eduagarcia commited on
Commit
f976f1c
·
1 Parent(s): 5639a81

Add NPM field

Browse files
src/display/utils.py CHANGED
@@ -51,7 +51,7 @@ for task in Tasks:
51
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
52
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
53
  auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
54
- auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", True)])
55
  auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
56
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
57
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
@@ -107,11 +107,15 @@ baseline_row = {
107
  }
108
 
109
  baseline_list = []
 
110
  for task in Tasks:
111
  baseline_row[task.value.col_name] = task.value.baseline
112
- if task.value.baseline is not None and (isinstance(task.value.baseline, float) or isinstance(task.value.baseline, int)):
113
- baseline_list.append(task.value.baseline)
 
 
114
  baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
 
115
 
116
  #if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
117
  baseline_row["🤗 Leaderboard Average"] = None
@@ -151,11 +155,16 @@ human_baseline_row = {
151
  }
152
 
153
  baseline_list = []
 
154
  for task in Tasks:
155
  human_baseline_row[task.value.col_name] = task.value.human_baseline
156
- if task.value.human_baseline is not None and (isinstance(task.value.baseline, float) or isinstance(task.value.baseline, int)):
157
- baseline_list.append(task.value.human_baseline)
 
 
 
158
  human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
 
159
  #if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
160
  human_baseline_row["🤗 Leaderboard Average"] = None
161
 
 
51
  auto_eval_column_dict.append(["model_type", ColumnContent, ColumnContent("Type", "str", False)])
52
  auto_eval_column_dict.append(["architecture", ColumnContent, ColumnContent("Architecture", "str", False)])
53
  auto_eval_column_dict.append(["weight_type", ColumnContent, ColumnContent("Weight type", "str", False, True)])
54
+ auto_eval_column_dict.append(["precision", ColumnContent, ColumnContent("Precision", "str", False)])
55
  auto_eval_column_dict.append(["merged", ColumnContent, ColumnContent("Merged", "bool", False)])
56
  auto_eval_column_dict.append(["license", ColumnContent, ColumnContent("Hub License", "str", False)])
57
  auto_eval_column_dict.append(["params", ColumnContent, ColumnContent("#Params (B)", "number", False)])
 
107
  }
108
 
109
  baseline_list = []
110
+ npm = []
111
  for task in Tasks:
112
  baseline_row[task.value.col_name] = task.value.baseline
113
+ res = task.value.baseline
114
+ if res is not None and (isinstance(res, float) or isinstance(res, int)):
115
+ baseline_list.append(res)
116
+ npm.append((res - task.value.baseline) / (100 - task.value.baseline))
117
  baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
118
+ baseline_row[AutoEvalColumn.npm.name] = round(sum(npm) / len(npm), 2)
119
 
120
  #if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
121
  baseline_row["🤗 Leaderboard Average"] = None
 
155
  }
156
 
157
  baseline_list = []
158
+ npm = []
159
  for task in Tasks:
160
  human_baseline_row[task.value.col_name] = task.value.human_baseline
161
+ res = task.value.human_baseline
162
+ if res is None or not (isinstance(res, float) or isinstance(res, int)):
163
+ res = 95.0
164
+ baseline_list.append(res)
165
+ npm.append((res - task.value.baseline) / (100 - task.value.baseline))
166
  human_baseline_row[AutoEvalColumn.average.name] = round(sum(baseline_list) / len(baseline_list), 2)
167
+ human_baseline_row[AutoEvalColumn.npm.name] = round(sum(npm) / len(npm), 2)
168
  #if GET_ORIGINAL_HF_LEADERBOARD_EVAL_RESULTS:
169
  human_baseline_row["🤗 Leaderboard Average"] = None
170
 
src/leaderboard/read_evals.py CHANGED
@@ -166,8 +166,8 @@ class EvalResult:
166
  continue
167
  average.append(res)
168
  npm.append((res-task.value.baseline)*100.0 / (100.0-task.value.baseline))
169
- average = sum(average)/len(average)
170
- npm = sum(npm)/len(npm)
171
 
172
  data_dict = {
173
  "eval_name": self.eval_name, # not a column, just a save name,
 
166
  continue
167
  average.append(res)
168
  npm.append((res-task.value.baseline)*100.0 / (100.0-task.value.baseline))
169
+ average = round(sum(average)/len(average), 2)
170
+ npm = round(sum(npm)/len(npm), 2)
171
 
172
  data_dict = {
173
  "eval_name": self.eval_name, # not a column, just a save name,