davidadamczyk
commited on
Commit
·
ebe77ac
1
Parent(s):
8fae53c
Add precision and hf_model_id
Browse files- app.py +8 -3
- src/display/utils.py +2 -0
- src/populate.py +4 -3
- src/submission/submit.py +17 -8
app.py
CHANGED
@@ -304,7 +304,9 @@ with demo:
|
|
304 |
with gr.Row():
|
305 |
with gr.Column():
|
306 |
model_name_textbox = gr.Textbox(label="Model name")
|
307 |
-
|
|
|
|
|
308 |
file_output = gr.File()
|
309 |
upload_button = gr.UploadButton("Upload json", file_types=['.json'])
|
310 |
upload_button.upload(validate_upload, upload_button, file_output)
|
@@ -342,9 +344,12 @@ with demo:
|
|
342 |
fn = add_new_eval,
|
343 |
inputs = [
|
344 |
model_name_textbox,
|
345 |
-
upload_button
|
|
|
|
|
|
|
346 |
],
|
347 |
-
outputs = [submission_result, model_name_textbox],
|
348 |
)
|
349 |
|
350 |
with gr.Row():
|
|
|
304 |
with gr.Row():
|
305 |
with gr.Column():
|
306 |
model_name_textbox = gr.Textbox(label="Model name")
|
307 |
+
precision = gr.Radio(["bfloat16", "float16", "4bit"], label="Precision", info="What precision are you using for inference?")
|
308 |
+
hf_model_id = gr.Textbox(label="Huggingface Model ID")
|
309 |
+
contact_email = gr.Textbox(label="E-Mail")
|
310 |
file_output = gr.File()
|
311 |
upload_button = gr.UploadButton("Upload json", file_types=['.json'])
|
312 |
upload_button.upload(validate_upload, upload_button, file_output)
|
|
|
344 |
fn = add_new_eval,
|
345 |
inputs = [
|
346 |
model_name_textbox,
|
347 |
+
upload_button,
|
348 |
+
precision,
|
349 |
+
hf_model_id,
|
350 |
+
contact_email
|
351 |
],
|
352 |
+
outputs = [submission_result, model_name_textbox, precision, hf_model_id, contact_email],
|
353 |
)
|
354 |
|
355 |
with gr.Row():
|
src/display/utils.py
CHANGED
@@ -48,6 +48,8 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
|
|
48 |
"""
|
49 |
|
50 |
auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
|
|
|
|
|
51 |
auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
|
52 |
auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
|
53 |
auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])
|
|
|
48 |
"""
|
49 |
|
50 |
auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
|
51 |
+
auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("precision", "str", True)])
|
52 |
+
auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("hf_model_id", "str", True)])
|
53 |
auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
|
54 |
auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
|
55 |
auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])
|
src/populate.py
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
import json
|
2 |
import os
|
3 |
-
|
4 |
import pandas as pd
|
5 |
|
6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
@@ -14,16 +14,17 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
|
|
14 |
df = pd.DataFrame.from_records(raw_data)
|
15 |
#df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
16 |
df = df[cols].round(decimals=2)
|
17 |
-
|
18 |
# filter out if any of the benchmarks have not been produced
|
19 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
|
|
20 |
return raw_data, df
|
21 |
|
22 |
|
23 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
24 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
25 |
all_evals = []
|
26 |
-
|
27 |
for entry in entries:
|
28 |
if ".json" in entry:
|
29 |
file_path = os.path.join(save_path, entry)
|
|
|
1 |
import json
|
2 |
import os
|
3 |
+
import numpy as np
|
4 |
import pandas as pd
|
5 |
|
6 |
from src.display.formatting import has_no_nan_values, make_clickable_model
|
|
|
14 |
df = pd.DataFrame.from_records(raw_data)
|
15 |
#df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
|
16 |
df = df[cols].round(decimals=2)
|
17 |
+
df.replace(r'\s+', np.nan, regex=True)
|
18 |
# filter out if any of the benchmarks have not been produced
|
19 |
df = df[has_no_nan_values(df, benchmark_cols)]
|
20 |
+
|
21 |
return raw_data, df
|
22 |
|
23 |
|
24 |
def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
|
25 |
entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
|
26 |
all_evals = []
|
27 |
+
|
28 |
for entry in entries:
|
29 |
if ".json" in entry:
|
30 |
file_path = os.path.join(save_path, entry)
|
src/submission/submit.py
CHANGED
@@ -24,8 +24,12 @@ USERS_TO_SUBMISSION_DATES = None
|
|
24 |
|
25 |
def add_new_eval(
|
26 |
eval_name: str,
|
27 |
-
upload: object
|
|
|
|
|
|
|
28 |
):
|
|
|
29 |
with open(upload, mode="r") as f:
|
30 |
data = json.load(f)
|
31 |
|
@@ -33,21 +37,26 @@ def add_new_eval(
|
|
33 |
|
34 |
acc_keys = ['exact_match,none', 'exact_match,flexible-extract', 'exact_match,strict-match']
|
35 |
|
36 |
-
ret = {
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
for k, v in results.items():
|
38 |
for acc_k in acc_keys:
|
39 |
if acc_k in v and k in BENCHMARK_COLS:
|
40 |
ret[k] = v[acc_k]
|
41 |
-
|
42 |
#validation
|
43 |
for k,v in ret.items():
|
44 |
-
if k
|
45 |
continue
|
46 |
if k not in BENCHMARK_COLS:
|
47 |
print(f"Missing: {k}")
|
48 |
return styled_error(f'Missing: {k}')
|
49 |
|
50 |
-
if len(BENCHMARK_COLS) != len(ret) -
|
51 |
print(f"Missing columns")
|
52 |
return styled_error(f'Missing columns')
|
53 |
|
@@ -87,8 +96,8 @@ def add_new_eval(
|
|
87 |
print("path_in_repo: ",out_path.split("eval-queue/")[1])
|
88 |
print("repo_id: ", RESULTS_REPO)
|
89 |
print("repo_type: ", "dataset")
|
90 |
-
|
91 |
-
API.upload_file(
|
92 |
path_or_fileobj=out_path,
|
93 |
path_in_repo=out_path.split("eval-queue/")[1],
|
94 |
repo_id=RESULTS_REPO,
|
@@ -192,4 +201,4 @@ def add_new_eval(
|
|
192 |
"""
|
193 |
return styled_message(
|
194 |
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
|
195 |
-
), ""
|
|
|
24 |
|
25 |
def add_new_eval(
|
26 |
eval_name: str,
|
27 |
+
upload: object,
|
28 |
+
precision: str,
|
29 |
+
hf_model_id: str,
|
30 |
+
contact_email: str
|
31 |
):
|
32 |
+
|
33 |
with open(upload, mode="r") as f:
|
34 |
data = json.load(f)
|
35 |
|
|
|
37 |
|
38 |
acc_keys = ['exact_match,none', 'exact_match,flexible-extract', 'exact_match,strict-match']
|
39 |
|
40 |
+
ret = {
|
41 |
+
'eval_name': eval_name,
|
42 |
+
'precision': precision,
|
43 |
+
'hf_model_id': hf_model_id,
|
44 |
+
'contact_email': contact_email
|
45 |
+
}
|
46 |
+
|
47 |
for k, v in results.items():
|
48 |
for acc_k in acc_keys:
|
49 |
if acc_k in v and k in BENCHMARK_COLS:
|
50 |
ret[k] = v[acc_k]
|
|
|
51 |
#validation
|
52 |
for k,v in ret.items():
|
53 |
+
if k in ['eval_name', 'precision', 'hf_model_id', 'contact_email']:
|
54 |
continue
|
55 |
if k not in BENCHMARK_COLS:
|
56 |
print(f"Missing: {k}")
|
57 |
return styled_error(f'Missing: {k}')
|
58 |
|
59 |
+
if len(BENCHMARK_COLS) != len(ret) - 4:
|
60 |
print(f"Missing columns")
|
61 |
return styled_error(f'Missing columns')
|
62 |
|
|
|
96 |
print("path_in_repo: ",out_path.split("eval-queue/")[1])
|
97 |
print("repo_id: ", RESULTS_REPO)
|
98 |
print("repo_type: ", "dataset")
|
99 |
+
|
100 |
+
response = API.upload_file(
|
101 |
path_or_fileobj=out_path,
|
102 |
path_in_repo=out_path.split("eval-queue/")[1],
|
103 |
repo_id=RESULTS_REPO,
|
|
|
201 |
"""
|
202 |
return styled_message(
|
203 |
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
|
204 |
+
), "", "", "", ""
|