Jae-Won Chung commited on
Commit
8b30258
·
1 Parent(s): cdc3f99

Default for the app

Browse files
Files changed (3) hide show
  1. LEADERBOARD.md +1 -1
  2. app.py +17 -10
  3. data/2023-06-17/schema.yaml +1 -1
LEADERBOARD.md CHANGED
@@ -10,7 +10,7 @@ That is, when asked the same thing, different models answer in different lengths
10
 
11
  - `gpu`: NVIDIA GPU model name. Note that NLP evaluation was only run once on our A40 GPUs, so this column only changes system-level measurements like latency and energy.
12
  - `task`: Name of the task. See *Tasks* below for details.
13
- - `energy_efficiency`: The average NLP evaluation metric attained per Joule of energy.
14
  - `energy` (J): The average energy consumed by the model to generate a response.
15
  - `nlp_average`: The arithmetic average of the NLP evaluation metrics we obtained. See *NLP evaluation metrics* below for details.
16
  - `throughput` (token/s): The average number of tokens generated per second.
 
10
 
11
  - `gpu`: NVIDIA GPU model name. Note that NLP evaluation was only run once on our A40 GPUs, so this column only changes system-level measurements like latency and energy.
12
  - `task`: Name of the task. See *Tasks* below for details.
13
+ - `energy_eff`: Our definition of energy efficiency: Average NLP evaluation metric attained per Joule of energy.
14
  - `energy` (J): The average energy consumed by the model to generate a response.
15
  - `nlp_average`: The arithmetic average of the NLP evaluation metrics we obtained. See *NLP evaluation metrics* below for details.
16
  - `throughput` (token/s): The average number of tokens generated per second.
app.py CHANGED
@@ -35,7 +35,7 @@ class TableManager:
35
  df["model"] = df["model"].apply(format_model_link)
36
 
37
  # Sort by our 'energy efficiency' score.
38
- df = df.sort_values(by="energy_efficiency", ascending=True)
39
 
40
  # The full table where all the data are.
41
  self.full_df = df
@@ -71,24 +71,24 @@ class TableManager:
71
  if res_df.empty:
72
  raise ValueError(f"No benchmark CSV files were read from {data_dir=}.")
73
 
74
- df = pd.merge(res_df, df_score, on=["model"])
75
 
76
  # Energy efficiency is defined as the amount of average NLP performance
77
  # the model gets per Joule of energy.
78
- df["energy_efficiency"] = df["nlp_average"] / df["energy"]
79
 
80
  # Order columns.
81
  columns = df.columns.to_list()
82
  cols_to_order = ["model"]
83
  cols_to_order.extend(self.schema.keys())
84
- cols_to_order.extend(["energy_efficiency", "energy", "nlp_average"])
85
  columns = cols_to_order + [col for col in columns if col not in cols_to_order]
86
  df = df[columns]
87
 
88
  # Delete rows with *any* NaN values.
89
  df = df.dropna()
90
 
91
- return df.round(2)
92
 
93
  def _format_msg(self, text: str) -> str:
94
  """Formats into HTML that prints in Monospace font."""
@@ -131,20 +131,27 @@ class TableManager:
131
  return self.cur_df, self._format_msg(f"{verb} column '{column_name}'.")
132
 
133
  def get_dropdown(self):
134
- columns = self.full_df.columns.tolist()[1:] # include gpu and task in the dropdown
135
  return [
136
  gr.Dropdown(value="gpu", choices=columns, label="X"),
137
  gr.Dropdown(value="nlp_average", choices=columns, label="Y"),
138
- gr.Dropdown(value="energy_efficiency", choices=columns, label="Z (optional)"),
139
  ]
140
 
141
  def update_dropdown(self):
142
  columns = self.full_df.columns.tolist()[1:]
143
- dropdown_update = gr.Dropdown.update(choices=columns)
144
- return [dropdown_update] * 3
 
 
 
145
 
146
  def set_filter_get_df(self, *filters):
147
  """Set the current set of filters and return the filtered DataFrame."""
 
 
 
 
148
  index = np.full(len(self.full_df), True)
149
  for setup, choice in zip(self.schema, filters):
150
  index = index & self.full_df[setup].isin(choice)
@@ -378,6 +385,6 @@ with block:
378
  gr.Markdown(open("LEADERBOARD.md").read())
379
 
380
  # Load the table on page load.
381
- block.load(lambda tbm: tbm.full_df, inputs=tbm, outputs=dataframe)
382
 
383
  block.launch()
 
35
  df["model"] = df["model"].apply(format_model_link)
36
 
37
  # Sort by our 'energy efficiency' score.
38
+ df = df.sort_values(by="energy_eff", ascending=False)
39
 
40
  # The full table where all the data are.
41
  self.full_df = df
 
71
  if res_df.empty:
72
  raise ValueError(f"No benchmark CSV files were read from {data_dir=}.")
73
 
74
+ df = pd.merge(res_df, df_score, on=["model"]).round(2)
75
 
76
  # Energy efficiency is defined as the amount of average NLP performance
77
  # the model gets per Joule of energy.
78
+ df["energy_eff"] = (df["nlp_average"] / df["energy"]).round(4)
79
 
80
  # Order columns.
81
  columns = df.columns.to_list()
82
  cols_to_order = ["model"]
83
  cols_to_order.extend(self.schema.keys())
84
+ cols_to_order.extend(["energy_eff", "energy", "nlp_average"])
85
  columns = cols_to_order + [col for col in columns if col not in cols_to_order]
86
  df = df[columns]
87
 
88
  # Delete rows with *any* NaN values.
89
  df = df.dropna()
90
 
91
+ return df
92
 
93
  def _format_msg(self, text: str) -> str:
94
  """Formats into HTML that prints in Monospace font."""
 
131
  return self.cur_df, self._format_msg(f"{verb} column '{column_name}'.")
132
 
133
  def get_dropdown(self):
134
+ columns = self.full_df.columns.tolist()[1:]
135
  return [
136
  gr.Dropdown(value="gpu", choices=columns, label="X"),
137
  gr.Dropdown(value="nlp_average", choices=columns, label="Y"),
138
+ gr.Dropdown(value="energy_eff", choices=["None", *columns], label="Z (optional)"),
139
  ]
140
 
141
  def update_dropdown(self):
142
  columns = self.full_df.columns.tolist()[1:]
143
+ return [
144
+ gr.Dropdown.update(choices=columns),
145
+ gr.Dropdown.update(choices=columns),
146
+ gr.Dropdown.update(choices=["None", *columns])),
147
+ ]
148
 
149
  def set_filter_get_df(self, *filters):
150
  """Set the current set of filters and return the filtered DataFrame."""
151
+ # If the filter is empty, we default to the first choice for each key.
152
+ if not filters:
153
+ filters = [choices[0] for choices in self.schema.values()]
154
+
155
  index = np.full(len(self.full_df), True)
156
  for setup, choice in zip(self.schema, filters):
157
  index = index & self.full_df[setup].isin(choice)
 
385
  gr.Markdown(open("LEADERBOARD.md").read())
386
 
387
  # Load the table on page load.
388
+ block.load(TableManager.set_filter_get_df, input=tbm, outputs=dataframe)
389
 
390
  block.launch()
data/2023-06-17/schema.yaml CHANGED
@@ -1,2 +1,2 @@
1
- gpu: ["A40", "A100"]
2
  task: ["chat", "chat-concise", "instruct", "instruct-concise"]
 
1
+ gpu: ["A100", "A40"]
2
  task: ["chat", "chat-concise", "instruct", "instruct-concise"]