davidadamczyk
commited on
Commit
·
8fae53c
1
Parent(s):
5812da8
Update form
Browse files- app.py +5 -4
- src/submission/submit.py +40 -11
app.py
CHANGED
@@ -63,6 +63,7 @@ def download_data():
|
|
63 |
|
64 |
download_data()
|
65 |
|
|
|
66 |
"""
|
67 |
(
|
68 |
finished_eval_queue_df,
|
@@ -338,12 +339,12 @@ with demo:
|
|
338 |
submit_button = gr.Button("Submit Eval", interactive=True)
|
339 |
submission_result = gr.Markdown()
|
340 |
submit_button.click(
|
341 |
-
add_new_eval,
|
342 |
-
[
|
343 |
model_name_textbox,
|
344 |
upload_button
|
345 |
],
|
346 |
-
[submission_result],
|
347 |
)
|
348 |
|
349 |
with gr.Row():
|
@@ -357,6 +358,6 @@ with demo:
|
|
357 |
)
|
358 |
|
359 |
scheduler = BackgroundScheduler()
|
360 |
-
scheduler.add_job(restart_space, "interval", seconds=
|
361 |
scheduler.start()
|
362 |
demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0")
|
|
|
63 |
|
64 |
download_data()
|
65 |
|
66 |
+
|
67 |
"""
|
68 |
(
|
69 |
finished_eval_queue_df,
|
|
|
339 |
submit_button = gr.Button("Submit Eval", interactive=True)
|
340 |
submission_result = gr.Markdown()
|
341 |
submit_button.click(
|
342 |
+
fn = add_new_eval,
|
343 |
+
inputs = [
|
344 |
model_name_textbox,
|
345 |
upload_button
|
346 |
],
|
347 |
+
outputs = [submission_result, model_name_textbox],
|
348 |
)
|
349 |
|
350 |
with gr.Row():
|
|
|
358 |
)
|
359 |
|
360 |
scheduler = BackgroundScheduler()
|
361 |
+
scheduler.add_job(restart_space, "interval", seconds=60)
|
362 |
scheduler.start()
|
363 |
demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0")
|
src/submission/submit.py
CHANGED
@@ -1,5 +1,6 @@
|
|
1 |
import json
|
2 |
import os
|
|
|
3 |
from datetime import datetime, timezone
|
4 |
import numpy as np
|
5 |
import pandas as pd
|
@@ -30,22 +31,50 @@ def add_new_eval(
|
|
30 |
|
31 |
results = data['results']
|
32 |
|
|
|
|
|
33 |
ret = {"eval_name": eval_name}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
34 |
# TODO add complex validation
|
35 |
-
print(results.keys())
|
36 |
-
print(BENCHMARK_COLS)
|
37 |
-
for input_col in results.keys():
|
38 |
-
|
39 |
-
|
40 |
-
|
41 |
-
ret.update({i:j['acc,none'] for i,j in results.items()})
|
42 |
# fake data for testing...
|
43 |
#ret.update({i:round(np.random.normal(1, 0.5, 1)[0], 2) for i,j in results.items()})
|
44 |
|
45 |
-
|
46 |
-
user_name = "davidadamczyk"
|
47 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
48 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
49 |
out_path = f"{OUT_DIR}/{eval_name}_eval_request.json"
|
50 |
|
51 |
with open(out_path, "w") as f:
|
@@ -58,7 +87,7 @@ def add_new_eval(
|
|
58 |
print("path_in_repo: ",out_path.split("eval-queue/")[1])
|
59 |
print("repo_id: ", RESULTS_REPO)
|
60 |
print("repo_type: ", "dataset")
|
61 |
-
|
62 |
API.upload_file(
|
63 |
path_or_fileobj=out_path,
|
64 |
path_in_repo=out_path.split("eval-queue/")[1],
|
@@ -163,4 +192,4 @@ def add_new_eval(
|
|
163 |
"""
|
164 |
return styled_message(
|
165 |
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
|
166 |
-
)
|
|
|
1 |
import json
|
2 |
import os
|
3 |
+
from glob import glob
|
4 |
from datetime import datetime, timezone
|
5 |
import numpy as np
|
6 |
import pandas as pd
|
|
|
31 |
|
32 |
results = data['results']
|
33 |
|
34 |
+
acc_keys = ['exact_match,none', 'exact_match,flexible-extract', 'exact_match,strict-match']
|
35 |
+
|
36 |
ret = {"eval_name": eval_name}
|
37 |
+
for k, v in results.items():
|
38 |
+
for acc_k in acc_keys:
|
39 |
+
if acc_k in v and k in BENCHMARK_COLS:
|
40 |
+
ret[k] = v[acc_k]
|
41 |
+
|
42 |
+
#validation
|
43 |
+
for k,v in ret.items():
|
44 |
+
if k == "eval_name":
|
45 |
+
continue
|
46 |
+
if k not in BENCHMARK_COLS:
|
47 |
+
print(f"Missing: {k}")
|
48 |
+
return styled_error(f'Missing: {k}')
|
49 |
+
|
50 |
+
if len(BENCHMARK_COLS) != len(ret) - 1:
|
51 |
+
print(f"Missing columns")
|
52 |
+
return styled_error(f'Missing columns')
|
53 |
+
|
54 |
# TODO add complex validation
|
55 |
+
#print(results.keys())
|
56 |
+
#print(BENCHMARK_COLS)
|
57 |
+
#for input_col in results.keys():
|
58 |
+
# if input_col not in BENCHMARK_COLS:
|
59 |
+
# print(input_col)
|
60 |
+
# return styled_error(f'Missing: {input_col}')
|
61 |
+
#ret.update({i:j['acc,none'] for i,j in results.items()})
|
62 |
# fake data for testing...
|
63 |
#ret.update({i:round(np.random.normal(1, 0.5, 1)[0], 2) for i,j in results.items()})
|
64 |
|
65 |
+
user_name = "czechbench_leaderboard"
|
|
|
66 |
OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
|
67 |
|
68 |
+
existing_eval_names = []
|
69 |
+
for fname in glob(f"{OUT_DIR}/*.json"):
|
70 |
+
with open(fname, mode="r") as f:
|
71 |
+
existing_eval = json.load(f)
|
72 |
+
existing_eval_names.append(existing_eval['eval_name'])
|
73 |
+
|
74 |
+
if ret['eval_name'] in existing_eval_names:
|
75 |
+
print(f"Model name {ret['eval_name']} is used!")
|
76 |
+
return styled_error(f"Model name {ret['eval_name']} is used!")
|
77 |
+
|
78 |
out_path = f"{OUT_DIR}/{eval_name}_eval_request.json"
|
79 |
|
80 |
with open(out_path, "w") as f:
|
|
|
87 |
print("path_in_repo: ",out_path.split("eval-queue/")[1])
|
88 |
print("repo_id: ", RESULTS_REPO)
|
89 |
print("repo_type: ", "dataset")
|
90 |
+
|
91 |
API.upload_file(
|
92 |
path_or_fileobj=out_path,
|
93 |
path_in_repo=out_path.split("eval-queue/")[1],
|
|
|
192 |
"""
|
193 |
return styled_message(
|
194 |
"Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
|
195 |
+
), ""
|