davidadamczyk commited on
Commit
ebe77ac
·
1 Parent(s): 8fae53c

Add precision and hf_model_id

Browse files
Files changed (4) hide show
  1. app.py +8 -3
  2. src/display/utils.py +2 -0
  3. src/populate.py +4 -3
  4. src/submission/submit.py +17 -8
app.py CHANGED
@@ -304,7 +304,9 @@ with demo:
304
  with gr.Row():
305
  with gr.Column():
306
  model_name_textbox = gr.Textbox(label="Model name")
307
-
 
 
308
  file_output = gr.File()
309
  upload_button = gr.UploadButton("Upload json", file_types=['.json'])
310
  upload_button.upload(validate_upload, upload_button, file_output)
@@ -342,9 +344,12 @@ with demo:
342
  fn = add_new_eval,
343
  inputs = [
344
  model_name_textbox,
345
- upload_button
 
 
 
346
  ],
347
- outputs = [submission_result, model_name_textbox],
348
  )
349
 
350
  with gr.Row():
 
304
  with gr.Row():
305
  with gr.Column():
306
  model_name_textbox = gr.Textbox(label="Model name")
307
+ precision = gr.Radio(["bfloat16", "float16", "4bit"], label="Precision", info="What precision are you using for inference?")
308
+ hf_model_id = gr.Textbox(label="Huggingface Model ID")
309
+ contact_email = gr.Textbox(label="E-Mail")
310
  file_output = gr.File()
311
  upload_button = gr.UploadButton("Upload json", file_types=['.json'])
312
  upload_button.upload(validate_upload, upload_button, file_output)
 
344
  fn = add_new_eval,
345
  inputs = [
346
  model_name_textbox,
347
+ upload_button,
348
+ precision,
349
+ hf_model_id,
350
+ contact_email
351
  ],
352
+ outputs = [submission_result, model_name_textbox, precision, hf_model_id, contact_email],
353
  )
354
 
355
  with gr.Row():
src/display/utils.py CHANGED
@@ -48,6 +48,8 @@ auto_eval_column_dict.append(["dummy", ColumnContent, ColumnContent("model_name_
48
  """
49
 
50
  auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
 
 
51
  auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
52
  auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
53
  auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])
 
48
  """
49
 
50
  auto_eval_column_dict.append(["eval_name", ColumnContent, ColumnContent("eval_name", "str", True, never_hidden=True)])
51
+ auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("precision", "str", True)])
52
+ auto_eval_column_dict.append(["hf_model_id", ColumnContent, ColumnContent("hf_model_id", "str", True)])
53
  auto_eval_column_dict.append(["agree_cs", ColumnContent, ColumnContent("agree_cs", "number", True)])
54
  auto_eval_column_dict.append(["anli_cs", ColumnContent, ColumnContent("anli_cs", "number", True)])
55
  auto_eval_column_dict.append(["arc_challenge_cs", ColumnContent, ColumnContent("arc_challenge_cs", "number", True)])
src/populate.py CHANGED
@@ -1,6 +1,6 @@
1
  import json
2
  import os
3
-
4
  import pandas as pd
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
@@ -14,16 +14,17 @@ def get_leaderboard_df(results_path: str, requests_path: str, cols: list, benchm
14
  df = pd.DataFrame.from_records(raw_data)
15
  #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
16
  df = df[cols].round(decimals=2)
17
-
18
  # filter out if any of the benchmarks have not been produced
19
  df = df[has_no_nan_values(df, benchmark_cols)]
 
20
  return raw_data, df
21
 
22
 
23
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
24
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
25
  all_evals = []
26
-
27
  for entry in entries:
28
  if ".json" in entry:
29
  file_path = os.path.join(save_path, entry)
 
1
  import json
2
  import os
3
+ import numpy as np
4
  import pandas as pd
5
 
6
  from src.display.formatting import has_no_nan_values, make_clickable_model
 
14
  df = pd.DataFrame.from_records(raw_data)
15
  #df = df.sort_values(by=[AutoEvalColumn.average.name], ascending=False)
16
  df = df[cols].round(decimals=2)
17
+ df.replace(r'\s+', np.nan, regex=True)
18
  # filter out if any of the benchmarks have not been produced
19
  df = df[has_no_nan_values(df, benchmark_cols)]
20
+
21
  return raw_data, df
22
 
23
 
24
  def get_evaluation_queue_df(save_path: str, cols: list) -> list[pd.DataFrame]:
25
  entries = [entry for entry in os.listdir(save_path) if not entry.startswith(".")]
26
  all_evals = []
27
+
28
  for entry in entries:
29
  if ".json" in entry:
30
  file_path = os.path.join(save_path, entry)
src/submission/submit.py CHANGED
@@ -24,8 +24,12 @@ USERS_TO_SUBMISSION_DATES = None
24
 
25
  def add_new_eval(
26
  eval_name: str,
27
- upload: object
 
 
 
28
  ):
 
29
  with open(upload, mode="r") as f:
30
  data = json.load(f)
31
 
@@ -33,21 +37,26 @@ def add_new_eval(
33
 
34
  acc_keys = ['exact_match,none', 'exact_match,flexible-extract', 'exact_match,strict-match']
35
 
36
- ret = {"eval_name": eval_name}
 
 
 
 
 
 
37
  for k, v in results.items():
38
  for acc_k in acc_keys:
39
  if acc_k in v and k in BENCHMARK_COLS:
40
  ret[k] = v[acc_k]
41
-
42
  #validation
43
  for k,v in ret.items():
44
- if k == "eval_name":
45
  continue
46
  if k not in BENCHMARK_COLS:
47
  print(f"Missing: {k}")
48
  return styled_error(f'Missing: {k}')
49
 
50
- if len(BENCHMARK_COLS) != len(ret) - 1:
51
  print(f"Missing columns")
52
  return styled_error(f'Missing columns')
53
 
@@ -87,8 +96,8 @@ def add_new_eval(
87
  print("path_in_repo: ",out_path.split("eval-queue/")[1])
88
  print("repo_id: ", RESULTS_REPO)
89
  print("repo_type: ", "dataset")
90
-
91
- API.upload_file(
92
  path_or_fileobj=out_path,
93
  path_in_repo=out_path.split("eval-queue/")[1],
94
  repo_id=RESULTS_REPO,
@@ -192,4 +201,4 @@ def add_new_eval(
192
  """
193
  return styled_message(
194
  "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
195
- ), ""
 
24
 
25
  def add_new_eval(
26
  eval_name: str,
27
+ upload: object,
28
+ precision: str,
29
+ hf_model_id: str,
30
+ contact_email: str
31
  ):
32
+
33
  with open(upload, mode="r") as f:
34
  data = json.load(f)
35
 
 
37
 
38
  acc_keys = ['exact_match,none', 'exact_match,flexible-extract', 'exact_match,strict-match']
39
 
40
+ ret = {
41
+ 'eval_name': eval_name,
42
+ 'precision': precision,
43
+ 'hf_model_id': hf_model_id,
44
+ 'contact_email': contact_email
45
+ }
46
+
47
  for k, v in results.items():
48
  for acc_k in acc_keys:
49
  if acc_k in v and k in BENCHMARK_COLS:
50
  ret[k] = v[acc_k]
 
51
  #validation
52
  for k,v in ret.items():
53
+ if k in ['eval_name', 'precision', 'hf_model_id', 'contact_email']:
54
  continue
55
  if k not in BENCHMARK_COLS:
56
  print(f"Missing: {k}")
57
  return styled_error(f'Missing: {k}')
58
 
59
+ if len(BENCHMARK_COLS) != len(ret) - 4:
60
  print(f"Missing columns")
61
  return styled_error(f'Missing columns')
62
 
 
96
  print("path_in_repo: ",out_path.split("eval-queue/")[1])
97
  print("repo_id: ", RESULTS_REPO)
98
  print("repo_type: ", "dataset")
99
+
100
+ response = API.upload_file(
101
  path_or_fileobj=out_path,
102
  path_in_repo=out_path.split("eval-queue/")[1],
103
  repo_id=RESULTS_REPO,
 
201
  """
202
  return styled_message(
203
  "Your request has been submitted to the evaluation queue!\nPlease wait for up to an hour for the model to show in the PENDING list."
204
+ ), "", "", "", ""