Spaces:

CIIRC-NLP
/

czechbench_leaderboard

Running

App Files Files Community

davidadamczyk commited on Sep 6, 2024

Commit

8fae53c

1 Parent(s): 5812da8

Update form

Browse files

Files changed (2) hide show

app.py +5 -4
src/submission/submit.py +40 -11

app.py CHANGED Viewed

@@ -63,6 +63,7 @@ def download_data():
 download_data()
 """
 (
     finished_eval_queue_df,
@@ -338,12 +339,12 @@ with demo:
             submit_button = gr.Button("Submit Eval", interactive=True)
             submission_result = gr.Markdown()
             submit_button.click(
-                add_new_eval,
-                [
                     model_name_textbox,
                     upload_button
                 ],
-                [submission_result],
             )
     with gr.Row():
@@ -357,6 +358,6 @@ with demo:
             )
 scheduler = BackgroundScheduler()
-scheduler.add_job(restart_space, "interval", seconds=1800)
 scheduler.start()
 demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0")

 download_data()
 """
 (
     finished_eval_queue_df,
             submit_button = gr.Button("Submit Eval", interactive=True)
             submission_result = gr.Markdown()
             submit_button.click(
+                fn = add_new_eval,
+                inputs = [
                     model_name_textbox,
                     upload_button
                 ],
+                outputs = [submission_result, model_name_textbox],
             )
     with gr.Row():
             )
 scheduler = BackgroundScheduler()
+scheduler.add_job(restart_space, "interval", seconds=60)
 scheduler.start()
 demo.queue(default_concurrency_limit=40).launch(server_name="0.0.0.0")

src/submission/submit.py CHANGED Viewed

@@ -1,5 +1,6 @@
 import json
 import os
 from datetime import datetime, timezone
 import numpy as np
 import pandas as pd
@@ -30,22 +31,50 @@ def add_new_eval(
     results = data['results']
     ret = {"eval_name": eval_name}
     # TODO add complex validation
-    print(results.keys())
-    print(BENCHMARK_COLS)
-    for input_col in results.keys():
-        if input_col not in BENCHMARK_COLS:
-            print(input_col)
-            return styled_error(f'Missing: {input_col}')
-    ret.update({i:j['acc,none'] for i,j in results.items()})
     # fake data for testing...
     #ret.update({i:round(np.random.normal(1, 0.5, 1)[0], 2) for i,j in results.items()})
-    user_name = "davidadamczyk"
     OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
     out_path = f"{OUT_DIR}/{eval_name}_eval_request.json"
     with open(out_path, "w") as f:
@@ -58,7 +87,7 @@ def add_new_eval(
     print("path_in_repo: ",out_path.split("eval-queue/")[1])
     print("repo_id: ", RESULTS_REPO)
     print("repo_type: ", "dataset")
     API.upload_file(
         path_or_fileobj=out_path,
         path_in_repo=out_path.split("eval-queue/")[1],
@@ -163,4 +192,4 @@ def add_new_eval(
     """
     return styled_message(
         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
-    )

 import json
 import os
+from glob import glob
 from datetime import datetime, timezone
 import numpy as np
 import pandas as pd
     results = data['results']
+    acc_keys = ['exact_match,none', 'exact_match,flexible-extract', 'exact_match,strict-match']
     ret = {"eval_name": eval_name}
+    for k, v in results.items():
+        for acc_k in acc_keys:
+            if acc_k in v and k in BENCHMARK_COLS:
+                ret[k] = v[acc_k]
+    #validation
+    for k,v in ret.items():
+        if k == "eval_name":
+            continue
+        if k not in BENCHMARK_COLS:
+            print(f"Missing: {k}")
+            return styled_error(f'Missing: {k}')
+    if len(BENCHMARK_COLS) != len(ret) - 1:
+        print(f"Missing columns")
+        return styled_error(f'Missing columns')
     # TODO add complex validation
+    #print(results.keys())
+    #print(BENCHMARK_COLS)
+    #for input_col in results.keys():
+    #    if input_col not in BENCHMARK_COLS:
+    #        print(input_col)
+    #        return styled_error(f'Missing: {input_col}')
+    #ret.update({i:j['acc,none'] for i,j in results.items()})
     # fake data for testing...
     #ret.update({i:round(np.random.normal(1, 0.5, 1)[0], 2) for i,j in results.items()})
+    user_name = "czechbench_leaderboard"
     OUT_DIR = f"{EVAL_REQUESTS_PATH}/{user_name}"
+    existing_eval_names = []
+    for fname in glob(f"{OUT_DIR}/*.json"):
+        with open(fname, mode="r") as f:
+            existing_eval = json.load(f)
+        existing_eval_names.append(existing_eval['eval_name'])
+    if ret['eval_name'] in existing_eval_names:
+        print(f"Model name {ret['eval_name']} is used!")
+        return styled_error(f"Model name {ret['eval_name']} is used!")
     out_path = f"{OUT_DIR}/{eval_name}_eval_request.json"
     with open(out_path, "w") as f:
     print("path_in_repo: ",out_path.split("eval-queue/")[1])
     print("repo_id: ", RESULTS_REPO)
     print("repo_type: ", "dataset")
     API.upload_file(
         path_or_fileobj=out_path,
         path_in_repo=out_path.split("eval-queue/")[1],
     """
     return styled_message(
         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
+    ), ""