Spaces:
Running
Running
Jae-Won Chung
commited on
Commit
·
8b30258
1
Parent(s):
cdc3f99
Default for the app
Browse files- LEADERBOARD.md +1 -1
- app.py +17 -10
- data/2023-06-17/schema.yaml +1 -1
LEADERBOARD.md
CHANGED
@@ -10,7 +10,7 @@ That is, when asked the same thing, different models answer in different lengths
|
|
10 |
|
11 |
- `gpu`: NVIDIA GPU model name. Note that NLP evaluation was only run once on our A40 GPUs, so this column only changes system-level measurements like latency and energy.
|
12 |
- `task`: Name of the task. See *Tasks* below for details.
|
13 |
-
- `
|
14 |
- `energy` (J): The average energy consumed by the model to generate a response.
|
15 |
- `nlp_average`: The arithmetic average of the NLP evaluation metrics we obtained. See *NLP evaluation metrics* below for details.
|
16 |
- `throughput` (token/s): The average number of tokens generated per second.
|
|
|
10 |
|
11 |
- `gpu`: NVIDIA GPU model name. Note that NLP evaluation was only run once on our A40 GPUs, so this column only changes system-level measurements like latency and energy.
|
12 |
- `task`: Name of the task. See *Tasks* below for details.
|
13 |
+
- `energy_eff`: Our definition of energy efficiency: Average NLP evaluation metric attained per Joule of energy.
|
14 |
- `energy` (J): The average energy consumed by the model to generate a response.
|
15 |
- `nlp_average`: The arithmetic average of the NLP evaluation metrics we obtained. See *NLP evaluation metrics* below for details.
|
16 |
- `throughput` (token/s): The average number of tokens generated per second.
|
app.py
CHANGED
@@ -35,7 +35,7 @@ class TableManager:
|
|
35 |
df["model"] = df["model"].apply(format_model_link)
|
36 |
|
37 |
# Sort by our 'energy efficiency' score.
|
38 |
-
df = df.sort_values(by="
|
39 |
|
40 |
# The full table where all the data are.
|
41 |
self.full_df = df
|
@@ -71,24 +71,24 @@ class TableManager:
|
|
71 |
if res_df.empty:
|
72 |
raise ValueError(f"No benchmark CSV files were read from {data_dir=}.")
|
73 |
|
74 |
-
df = pd.merge(res_df, df_score, on=["model"])
|
75 |
|
76 |
# Energy efficiency is defined as the amount of average NLP performance
|
77 |
# the model gets per Joule of energy.
|
78 |
-
df["
|
79 |
|
80 |
# Order columns.
|
81 |
columns = df.columns.to_list()
|
82 |
cols_to_order = ["model"]
|
83 |
cols_to_order.extend(self.schema.keys())
|
84 |
-
cols_to_order.extend(["
|
85 |
columns = cols_to_order + [col for col in columns if col not in cols_to_order]
|
86 |
df = df[columns]
|
87 |
|
88 |
# Delete rows with *any* NaN values.
|
89 |
df = df.dropna()
|
90 |
|
91 |
-
return df
|
92 |
|
93 |
def _format_msg(self, text: str) -> str:
|
94 |
"""Formats into HTML that prints in Monospace font."""
|
@@ -131,20 +131,27 @@ class TableManager:
|
|
131 |
return self.cur_df, self._format_msg(f"{verb} column '{column_name}'.")
|
132 |
|
133 |
def get_dropdown(self):
|
134 |
-
columns = self.full_df.columns.tolist()[1:]
|
135 |
return [
|
136 |
gr.Dropdown(value="gpu", choices=columns, label="X"),
|
137 |
gr.Dropdown(value="nlp_average", choices=columns, label="Y"),
|
138 |
-
gr.Dropdown(value="
|
139 |
]
|
140 |
|
141 |
def update_dropdown(self):
|
142 |
columns = self.full_df.columns.tolist()[1:]
|
143 |
-
|
144 |
-
|
|
|
|
|
|
|
145 |
|
146 |
def set_filter_get_df(self, *filters):
|
147 |
"""Set the current set of filters and return the filtered DataFrame."""
|
|
|
|
|
|
|
|
|
148 |
index = np.full(len(self.full_df), True)
|
149 |
for setup, choice in zip(self.schema, filters):
|
150 |
index = index & self.full_df[setup].isin(choice)
|
@@ -378,6 +385,6 @@ with block:
|
|
378 |
gr.Markdown(open("LEADERBOARD.md").read())
|
379 |
|
380 |
# Load the table on page load.
|
381 |
-
block.load(
|
382 |
|
383 |
block.launch()
|
|
|
35 |
df["model"] = df["model"].apply(format_model_link)
|
36 |
|
37 |
# Sort by our 'energy efficiency' score.
|
38 |
+
df = df.sort_values(by="energy_eff", ascending=False)
|
39 |
|
40 |
# The full table where all the data are.
|
41 |
self.full_df = df
|
|
|
71 |
if res_df.empty:
|
72 |
raise ValueError(f"No benchmark CSV files were read from {data_dir=}.")
|
73 |
|
74 |
+
df = pd.merge(res_df, df_score, on=["model"]).round(2)
|
75 |
|
76 |
# Energy efficiency is defined as the amount of average NLP performance
|
77 |
# the model gets per Joule of energy.
|
78 |
+
df["energy_eff"] = (df["nlp_average"] / df["energy"]).round(4)
|
79 |
|
80 |
# Order columns.
|
81 |
columns = df.columns.to_list()
|
82 |
cols_to_order = ["model"]
|
83 |
cols_to_order.extend(self.schema.keys())
|
84 |
+
cols_to_order.extend(["energy_eff", "energy", "nlp_average"])
|
85 |
columns = cols_to_order + [col for col in columns if col not in cols_to_order]
|
86 |
df = df[columns]
|
87 |
|
88 |
# Delete rows with *any* NaN values.
|
89 |
df = df.dropna()
|
90 |
|
91 |
+
return df
|
92 |
|
93 |
def _format_msg(self, text: str) -> str:
|
94 |
"""Formats into HTML that prints in Monospace font."""
|
|
|
131 |
return self.cur_df, self._format_msg(f"{verb} column '{column_name}'.")
|
132 |
|
133 |
def get_dropdown(self):
|
134 |
+
columns = self.full_df.columns.tolist()[1:]
|
135 |
return [
|
136 |
gr.Dropdown(value="gpu", choices=columns, label="X"),
|
137 |
gr.Dropdown(value="nlp_average", choices=columns, label="Y"),
|
138 |
+
gr.Dropdown(value="energy_eff", choices=["None", *columns], label="Z (optional)"),
|
139 |
]
|
140 |
|
141 |
def update_dropdown(self):
|
142 |
columns = self.full_df.columns.tolist()[1:]
|
143 |
+
return [
|
144 |
+
gr.Dropdown.update(choices=columns),
|
145 |
+
gr.Dropdown.update(choices=columns),
|
146 |
+
gr.Dropdown.update(choices=["None", *columns])),
|
147 |
+
]
|
148 |
|
149 |
def set_filter_get_df(self, *filters):
|
150 |
"""Set the current set of filters and return the filtered DataFrame."""
|
151 |
+
# If the filter is empty, we default to the first choice for each key.
|
152 |
+
if not filters:
|
153 |
+
filters = [choices[0] for choices in self.schema.values()]
|
154 |
+
|
155 |
index = np.full(len(self.full_df), True)
|
156 |
for setup, choice in zip(self.schema, filters):
|
157 |
index = index & self.full_df[setup].isin(choice)
|
|
|
385 |
gr.Markdown(open("LEADERBOARD.md").read())
|
386 |
|
387 |
# Load the table on page load.
|
388 |
+
block.load(TableManager.set_filter_get_df, input=tbm, outputs=dataframe)
|
389 |
|
390 |
block.launch()
|
data/2023-06-17/schema.yaml
CHANGED
@@ -1,2 +1,2 @@
|
|
1 |
-
gpu: ["
|
2 |
task: ["chat", "chat-concise", "instruct", "instruct-concise"]
|
|
|
1 |
+
gpu: ["A100", "A40"]
|
2 |
task: ["chat", "chat-concise", "instruct", "instruct-concise"]
|