Spaces:

CIIRC-NLP
/

czechbench_leaderboard

Running

App Files Files Community

davidadamczyk commited on Sep 8, 2024

Commit

ebe77ac

1 Parent(s): 8fae53c

Add precision and hf_model_id

Browse files

Files changed (4) hide show

app.py +8 -3
src/display/utils.py +2 -0
src/populate.py +4 -3
src/submission/submit.py +17 -8

app.py CHANGED Viewed

@@ -304,7 +304,9 @@ with demo:
             with gr.Row():
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")
                     file_output = gr.File()
                     upload_button = gr.UploadButton("Upload json", file_types=['.json'])
                     upload_button.upload(validate_upload, upload_button, file_output)
@@ -342,9 +344,12 @@ with demo:
                 fn = add_new_eval,
                 inputs = [
                     model_name_textbox,
-                    upload_button
                 ],
-                outputs = [submission_result, model_name_textbox],
             )
     with gr.Row():

             with gr.Row():
                 with gr.Column():
                     model_name_textbox = gr.Textbox(label="Model name")
+                    precision = gr.Radio(["bfloat16", "float16", "4bit"], label="Precision", info="What precision are you using for inference?")
+                    hf_model_id = gr.Textbox(label="Huggingface Model ID")
+                    contact_email = gr.Textbox(label="E-Mail")
                     file_output = gr.File()
                     upload_button = gr.UploadButton("Upload json", file_types=['.json'])
                     upload_button.upload(validate_upload, upload_button, file_output)
                 fn = add_new_eval,
                 inputs = [
                     model_name_textbox,
+                    upload_button,
+                    precision,
+                    hf_model_id,
+                    contact_email
                 ],
+                outputs = [submission_result, model_name_textbox, precision, hf_model_id, contact_email],
             )
     with gr.Row():

src/display/utils.py CHANGED Viewed

@@ -48,6 +48,8 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
 """
 auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
 auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
 auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
 auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])

 """
 auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
+auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("precision", "str", True)])
+auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("hf_model_id", "str", True)])
 auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
 auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
 auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])

src/populate.py CHANGED Viewed

@@ -1,6 +1,6 @@
 import json
 import os
 import pandas as pd
 from src.display.formatting import has_no_nan_values, make_clickable_model
@@ -14,16 +14,17 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
     df = pd.DataFrame.from_records(raw_data)
     #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
     # filter out if any of the benchmarks have not been produced
     df = df[has_no_nan_values(df, benchmark_cols)]
     return raw_data, df
 def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
     entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
     all_evals = []
     for entry in entries:
         if ".json" in entry:
             file_path = os.path.join(save_path, entry)

 import json
 import os
+import numpy as np
 import pandas as pd
 from src.display.formatting import has_no_nan_values, make_clickable_model
     df = pd.DataFrame.from_records(raw_data)
     #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
     df = df[cols].round(decimals=2)
+    df.replace(r'\s+', np.nan, regex=True)
     # filter out if any of the benchmarks have not been produced
     df = df[has_no_nan_values(df, benchmark_cols)]
     return raw_data, df
 def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
     entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
     all_evals = []
     for entry in entries:
         if ".json" in entry:
             file_path = os.path.join(save_path, entry)

src/submission/submit.py CHANGED Viewed

@@ -24,8 +24,12 @@ USERS_TO_SUBMISSION_DATES = None
 def add_new_eval(
     eval_name: str,
-    upload: object
 ):
     with open(upload, mode="r") as f:
         data = json.load(f)
@@ -33,21 +37,26 @@ def add_new_eval(
     acc_keys = ['exact_match,none', 'exact_match,flexible-extract', 'exact_match,strict-match']
-    ret = {"eval_name": eval_name}
     for k, v in results.items():
         for acc_k in acc_keys:
             if acc_k in v and k in BENCHMARK_COLS:
                 ret[k] = v[acc_k]
     #validation
     for k,v in ret.items():
-        if k == "eval_name":
             continue
         if k not in BENCHMARK_COLS:
             print(f"Missing: {k}")
             return styled_error(f'Missing: {k}')
-    if len(BENCHMARK_COLS) != len(ret) - 1:
         print(f"Missing columns")
         return styled_error(f'Missing columns')
@@ -87,8 +96,8 @@ def add_new_eval(
     print("path_in_repo: ",out_path.split("eval-queue/")[1])
     print("repo_id: ", RESULTS_REPO)
     print("repo_type: ", "dataset")
-    API.upload_file(
         path_or_fileobj=out_path,
         path_in_repo=out_path.split("eval-queue/")[1],
         repo_id=RESULTS_REPO,
@@ -192,4 +201,4 @@ def add_new_eval(
     """
     return styled_message(
         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
-    ), ""

 def add_new_eval(
     eval_name: str,
+    upload: object,
+    precision: str,
+    hf_model_id: str,
+    contact_email: str
 ):
     with open(upload, mode="r") as f:
         data = json.load(f)
     acc_keys = ['exact_match,none', 'exact_match,flexible-extract', 'exact_match,strict-match']
+    ret = {
+        'eval_name': eval_name,
+        'precision': precision,
+        'hf_model_id': hf_model_id,
+        'contact_email': contact_email
+        }
     for k, v in results.items():
         for acc_k in acc_keys:
             if acc_k in v and k in BENCHMARK_COLS:
                 ret[k] = v[acc_k]
     #validation
     for k,v in ret.items():
+        if k in ['eval_name', 'precision', 'hf_model_id', 'contact_email']:
             continue
         if k not in BENCHMARK_COLS:
             print(f"Missing: {k}")
             return styled_error(f'Missing: {k}')
+    if len(BENCHMARK_COLS) != len(ret) - 4:
         print(f"Missing columns")
         return styled_error(f'Missing columns')
     print("path_in_repo: ",out_path.split("eval-queue/")[1])
     print("repo_id: ", RESULTS_REPO)
     print("repo_type: ", "dataset")
+    response = API.upload_file(
         path_or_fileobj=out_path,
         path_in_repo=out_path.split("eval-queue/")[1],
         repo_id=RESULTS_REPO,
     """
     return styled_message(
         "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
+    ), "", "", "", ""