richardblythman
commited on
Commit
·
7d57619
1
Parent(s):
6bec1f5
add open source lms
Browse files- app.py +16 -13
- tabs/run_benchmark.py +4 -2
app.py
CHANGED
@@ -17,14 +17,14 @@ from tabs.run_benchmark import run_benchmark_main
|
|
17 |
demo = gr.Blocks()
|
18 |
|
19 |
|
20 |
-
def run_benchmark_gradio(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key):
|
21 |
"""Run the benchmark using inputs."""
|
22 |
if tool_name is None:
|
23 |
return "Please enter the name of your tool."
|
24 |
-
if openai_api_key is None and anthropic_api_key is None:
|
25 |
-
return "Please enter either OpenAI or Anthropic API key."
|
26 |
|
27 |
-
result = run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key)
|
28 |
if result == 'completed':
|
29 |
# get the results file in the results directory
|
30 |
fns = glob('results/*.csv')
|
@@ -101,16 +101,15 @@ with demo:
|
|
101 |
[
|
102 |
"prediction-offline",
|
103 |
"prediction-online",
|
104 |
-
"prediction-
|
105 |
-
"prediction-
|
106 |
-
"
|
107 |
-
"claude-prediction-online",
|
108 |
'prediction-request-rag',
|
109 |
-
|
110 |
-
"prediction-with-research-bold",
|
111 |
-
"prediction-request-reasoning-claude",
|
112 |
-
"prediction-request-rag-claude",
|
113 |
"prediction-url-cot-claude",
|
|
|
|
|
|
|
114 |
], label="Tool Name", info="Choose the tool to run")
|
115 |
model_name = gr.Dropdown([
|
116 |
"gpt-3.5-turbo-0125",
|
@@ -118,10 +117,14 @@ with demo:
|
|
118 |
"claude-3-haiku-20240307",
|
119 |
"claude-3-sonnet-20240229",
|
120 |
"claude-3-opus-20240229",
|
|
|
|
|
|
|
121 |
], label="Model Name", info="Choose the model to use")
|
122 |
with gr.Row():
|
123 |
openai_api_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here", type="password")
|
124 |
anthropic_api_key = gr.Textbox(label="Anthropic API Key", placeholder="Enter your Anthropic API key here", type="password")
|
|
|
125 |
with gr.Row():
|
126 |
num_questions = gr.Slider(
|
127 |
minimum=1,
|
@@ -139,7 +142,7 @@ with demo:
|
|
139 |
summary = gr.Dataframe()
|
140 |
|
141 |
run_button.click(run_benchmark_gradio,
|
142 |
-
inputs=[tool_name, model_name, num_questions, openai_api_key, anthropic_api_key],
|
143 |
outputs=[result, summary])
|
144 |
|
145 |
demo.queue(default_concurrency_limit=40).launch()
|
|
|
17 |
demo = gr.Blocks()
|
18 |
|
19 |
|
20 |
+
def run_benchmark_gradio(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key, openrouter_api_key):
|
21 |
"""Run the benchmark using inputs."""
|
22 |
if tool_name is None:
|
23 |
return "Please enter the name of your tool."
|
24 |
+
if openai_api_key is None and anthropic_api_key is None and openrouter_api_key is None:
|
25 |
+
return "Please enter either OpenAI or Anthropic or OpenRouter API key."
|
26 |
|
27 |
+
result = run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key, openrouter_api_key)
|
28 |
if result == 'completed':
|
29 |
# get the results file in the results directory
|
30 |
fns = glob('results/*.csv')
|
|
|
101 |
[
|
102 |
"prediction-offline",
|
103 |
"prediction-online",
|
104 |
+
# "prediction-online-summarized-info",
|
105 |
+
# "prediction-offline-sme",
|
106 |
+
# "prediction-online-sme",
|
|
|
107 |
'prediction-request-rag',
|
108 |
+
'prediction-request-reasoning',
|
|
|
|
|
|
|
109 |
"prediction-url-cot-claude",
|
110 |
+
# "prediction-request-rag-cohere",
|
111 |
+
# "prediction-with-research-conservative",
|
112 |
+
# "prediction-with-research-bold",
|
113 |
], label="Tool Name", info="Choose the tool to run")
|
114 |
model_name = gr.Dropdown([
|
115 |
"gpt-3.5-turbo-0125",
|
|
|
117 |
"claude-3-haiku-20240307",
|
118 |
"claude-3-sonnet-20240229",
|
119 |
"claude-3-opus-20240229",
|
120 |
+
"databricks/dbrx-instruct:nitro",
|
121 |
+
"nousresearch/nous-hermes-2-mixtral-8x7b-sft",
|
122 |
+
# "cohere/command-r-plus",
|
123 |
], label="Model Name", info="Choose the model to use")
|
124 |
with gr.Row():
|
125 |
openai_api_key = gr.Textbox(label="OpenAI API Key", placeholder="Enter your OpenAI API key here", type="password")
|
126 |
anthropic_api_key = gr.Textbox(label="Anthropic API Key", placeholder="Enter your Anthropic API key here", type="password")
|
127 |
+
openrouter_api_key = gr.Textbox(label="OpenRouter API Key", placeholder="Enter your OpenRouter API key here", type="password")
|
128 |
with gr.Row():
|
129 |
num_questions = gr.Slider(
|
130 |
minimum=1,
|
|
|
142 |
summary = gr.Dataframe()
|
143 |
|
144 |
run_button.click(run_benchmark_gradio,
|
145 |
+
inputs=[tool_name, model_name, num_questions, openai_api_key, anthropic_api_key, openrouter_api_key],
|
146 |
outputs=[result, summary])
|
147 |
|
148 |
demo.queue(default_concurrency_limit=40).launch()
|
tabs/run_benchmark.py
CHANGED
@@ -2,7 +2,7 @@ import os
|
|
2 |
from benchmark.run_benchmark import run_benchmark
|
3 |
|
4 |
|
5 |
-
def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key):
|
6 |
"""Run the benchmark using the provided function and API key."""
|
7 |
# Empyt the results directory
|
8 |
os.system("rm -rf results/*")
|
@@ -20,7 +20,9 @@ def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, ant
|
|
20 |
kwargs["api_keys"]["openai"] = openai_api_key
|
21 |
if anthropic_api_key:
|
22 |
kwargs["api_keys"]["anthropic"] = anthropic_api_key
|
23 |
-
|
|
|
|
|
24 |
kwargs["num_urls"] = 3
|
25 |
kwargs["num_words"] = 300
|
26 |
kwargs["provide_source_links"] = True
|
|
|
2 |
from benchmark.run_benchmark import run_benchmark
|
3 |
|
4 |
|
5 |
+
def run_benchmark_main(tool_name, model_name, num_questions, openai_api_key, anthropic_api_key, openrouter_api_key):
|
6 |
"""Run the benchmark using the provided function and API key."""
|
7 |
# Empyt the results directory
|
8 |
os.system("rm -rf results/*")
|
|
|
20 |
kwargs["api_keys"]["openai"] = openai_api_key
|
21 |
if anthropic_api_key:
|
22 |
kwargs["api_keys"]["anthropic"] = anthropic_api_key
|
23 |
+
if openrouter_api_key:
|
24 |
+
kwargs["api_keys"]["openrouter"] = openrouter_api_key
|
25 |
+
|
26 |
kwargs["num_urls"] = 3
|
27 |
kwargs["num_words"] = 300
|
28 |
kwargs["provide_source_links"] = True
|