natolambert commited on
Commit
521165c
Β·
1 Parent(s): 8ac8bdc
Files changed (2) hide show
  1. app.py +6 -6
  2. src/utils.py +2 -2
app.py CHANGED
@@ -52,7 +52,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
52
  subset_cols = [col for col in new_df.columns if col in sub_subsets]
53
  sub_data = new_df[subset_cols].values # take the relevant column values
54
  sub_counts = [example_counts[s] for s in sub_subsets] # take the example counts
55
- new_df[subset] = np.round(np.average(sub_data, axis=1, weights=sub_counts), 2) # take the weighted average
56
  # new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
57
 
58
  data_cols = list(subset_mapping.keys())
@@ -65,7 +65,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
65
  pref_data = dataframe_prefs[pref_columns].values
66
 
67
  # add column test sets knowing the rows are not identical, take superset
68
- dataframe_prefs["Prior Sets"] = np.round(np.nanmean(pref_data, axis=1), 2)
69
 
70
  # add column Test Sets empty to new_df
71
  new_df["Prior Sets"] = np.nan
@@ -83,7 +83,7 @@ def avg_over_rewardbench(dataframe_core, dataframe_prefs):
83
 
84
  # add total average
85
  data_cols += ["Prior Sets"]
86
- new_df["average"] = np.round(np.nanmean(new_df[data_cols].values, axis=1), 2)
87
 
88
  # make average third column
89
  keep_columns = ["model", "model_type", "average"] + data_cols
@@ -207,15 +207,15 @@ def regex_table(dataframe, regex, filter_button):
207
  with gr.Blocks(css=custom_css) as app:
208
  # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
209
  with gr.Row():
210
- with gr.Column(scale=3):
 
 
211
  # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
212
  # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
213
  # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
214
  gr.Markdown("""
215
  ![](file/src/logo.png)
216
  """)
217
- with gr.Column(scale=6):
218
- gr.Markdown(TOP_TEXT)
219
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
220
  with gr.TabItem("πŸ† RewardBench Leaderboard"):
221
  with gr.Row():
 
52
  subset_cols = [col for col in new_df.columns if col in sub_subsets]
53
  sub_data = new_df[subset_cols].values # take the relevant column values
54
  sub_counts = [example_counts[s] for s in sub_subsets] # take the example counts
55
+ new_df[subset] = np.round(np.average(sub_data, axis=1, weights=sub_counts), 1) # take the weighted average
56
  # new_df[subset] = np.round(np.nanmean(new_df[subset_cols].values, axis=1), 2)
57
 
58
  data_cols = list(subset_mapping.keys())
 
65
  pref_data = dataframe_prefs[pref_columns].values
66
 
67
  # add column test sets knowing the rows are not identical, take superset
68
+ dataframe_prefs["Prior Sets"] = np.round(np.nanmean(pref_data, axis=1), 1)
69
 
70
  # add column Test Sets empty to new_df
71
  new_df["Prior Sets"] = np.nan
 
83
 
84
  # add total average
85
  data_cols += ["Prior Sets"]
86
+ new_df["average"] = np.round(np.nanmean(new_df[data_cols].values, axis=1), 1)
87
 
88
  # make average third column
89
  keep_columns = ["model", "model_type", "average"] + data_cols
 
207
  with gr.Blocks(css=custom_css) as app:
208
  # create tabs for the app, moving the current table to one titled "rewardbench" and the benchmark_text to a tab called "About"
209
  with gr.Row():
210
+ with gr.Column(scale=6):
211
+ gr.Markdown(TOP_TEXT)
212
+ with gr.Column(scale=4):
213
  # search = gr.Textbox(label="Model Search (delimit with , )", placeholder="Regex search for a model")
214
  # filter_button = gr.Checkbox(label="Include AI2 training runs (or type ai2 above).", interactive=True)
215
  # img = gr.Image(value="https://private-user-images.githubusercontent.com/10695622/310698241-24ed272a-0844-451f-b414-fde57478703e.png", width=500)
216
  gr.Markdown("""
217
  ![](file/src/logo.png)
218
  """)
 
 
219
  with gr.Tabs(elem_classes="tab-buttons") as tabs:
220
  with gr.TabItem("πŸ† RewardBench Leaderboard"):
221
  with gr.Row():
src/utils.py CHANGED
@@ -97,8 +97,8 @@ def load_all_data(data_repo, subdir:str, subsubsets=False): # use HF api to p
97
  cols.remove("pku_safer")
98
 
99
  # round
100
- df[cols] = df[cols].round(2)
101
- avg = np.nanmean(df[cols].values,axis=1).round(2)
102
  # add average column
103
  df["average"] = avg
104
 
 
97
  cols.remove("pku_safer")
98
 
99
  # round
100
+ df[cols] = (df[cols]*100).round(1)
101
+ avg = np.nanmean(df[cols].values,axis=1).round(1)
102
  # add average column
103
  df["average"] = avg
104