Spaces:
Running
Running
Commit
Β·
134a499
1
Parent(s):
04a3faa
updated the llm-perf
Browse files- .gitignore +2 -0
- app.py +131 -124
- requirements.txt +4 -5
- src/utils.py +5 -24
.gitignore
CHANGED
@@ -3,3 +3,5 @@ __pycache__/
|
|
3 |
.ipynb_checkpoints
|
4 |
*ipynb
|
5 |
.vscode/
|
|
|
|
|
|
3 |
.ipynb_checkpoints
|
4 |
*ipynb
|
5 |
.vscode/
|
6 |
+
|
7 |
+
dataset/
|
app.py
CHANGED
@@ -1,88 +1,98 @@
|
|
1 |
import os
|
|
|
2 |
import gradio as gr
|
3 |
import pandas as pd
|
4 |
import plotly.express as px
|
5 |
-
from
|
|
|
6 |
|
|
|
7 |
from src.assets.css_html_js import custom_css
|
8 |
from src.assets.text_content import (
|
9 |
TITLE,
|
10 |
-
INTRODUCTION_TEXT,
|
11 |
ABOUT_TEXT,
|
|
|
12 |
EXAMPLE_CONFIG_TEXT,
|
13 |
CITATION_BUTTON_LABEL,
|
14 |
CITATION_BUTTON_TEXT,
|
15 |
)
|
16 |
-
from src.utils import (
|
17 |
-
restart_space,
|
18 |
-
load_dataset_repo,
|
19 |
-
process_model_name,
|
20 |
-
process_model_type,
|
21 |
-
)
|
22 |
|
23 |
-
|
24 |
-
HARDWARES_EMOJIS = ["π₯οΈ", "π»"]
|
25 |
-
LLM_PERF_LEADERBOARD_REPO = "optimum/llm-perf-leaderboard"
|
26 |
LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
|
27 |
-
|
28 |
-
|
29 |
ALL_COLUMNS_MAPPING = {
|
|
|
|
|
|
|
|
|
|
|
30 |
"backend.name": "Backend π",
|
31 |
"backend.torch_dtype": "Dtype π₯",
|
32 |
"optimizations": "Optimizations π οΈ",
|
33 |
"quantization": "Quantization ποΈ",
|
34 |
-
#
|
35 |
-
"
|
36 |
-
"
|
37 |
-
#
|
38 |
-
"
|
39 |
-
"generate.
|
|
|
|
|
|
|
|
|
|
|
40 |
"generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh) β¬οΈ",
|
41 |
-
|
42 |
-
|
43 |
-
"best_scored_model": "Best Scored LLM π",
|
44 |
}
|
|
|
|
|
|
|
45 |
ALL_COLUMNS_DATATYPES = [
|
|
|
|
|
|
|
|
|
|
|
46 |
"str",
|
47 |
"str",
|
48 |
"str",
|
49 |
"str",
|
50 |
-
#
|
51 |
-
"
|
52 |
-
"
|
53 |
-
|
|
|
|
|
|
|
|
|
54 |
"number",
|
55 |
"number",
|
56 |
"number",
|
57 |
-
"str",
|
58 |
-
#
|
59 |
-
"markdown",
|
60 |
-
]
|
61 |
-
NO_DUPLICATES_COLUMNS = [
|
62 |
-
"backend.name",
|
63 |
-
"backend.torch_dtype",
|
64 |
-
"optimizations",
|
65 |
-
"quantization",
|
66 |
-
#
|
67 |
-
"weight_class",
|
68 |
-
"model_type",
|
69 |
]
|
70 |
-
SORTING_COLUMN = ["best_score", "generate.latency(s)", "generate.peak_memory(MB)"]
|
71 |
-
SORTING_ASCENDING = [False, True, True]
|
72 |
|
73 |
-
llm_perf_dataset_repo = load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN)
|
74 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
75 |
|
76 |
-
def get_benchmark_df(benchmark="Succeeded-1xA100-80GB"):
|
77 |
-
if llm_perf_dataset_repo:
|
78 |
-
llm_perf_dataset_repo.git_pull()
|
79 |
-
# load data
|
80 |
-
benchmark_df = pd.read_csv(f"./llm-perf-dataset/reports/{benchmark}.csv")
|
81 |
-
clusters_df = pd.read_csv("./llm-perf-dataset/Clustered-Open-LLM-Leaderboard.csv")
|
82 |
# merge on model
|
83 |
-
merged_df =
|
84 |
-
clusters_df, left_on="model", right_on="best_scored_model"
|
85 |
-
)
|
86 |
# transpose energy consumption
|
87 |
merged_df["generate.energy_consumption(tokens/kWh)"] = (
|
88 |
1 / merged_df["generate.energy_consumption(kWh/token)"].fillna(1)
|
@@ -91,38 +101,44 @@ def get_benchmark_df(benchmark="Succeeded-1xA100-80GB"):
|
|
91 |
merged_df.loc[
|
92 |
merged_df["generate.energy_consumption(tokens/kWh)"] == 1,
|
93 |
"generate.energy_consumption(tokens/kWh)",
|
94 |
-
] =
|
95 |
-
# add optimizations
|
96 |
-
merged_df["optimizations"] = merged_df[
|
97 |
-
|
|
|
|
|
|
|
|
|
|
|
98 |
)
|
99 |
# add quantization scheme
|
100 |
-
merged_df["quantization"] = merged_df["backend.
|
101 |
lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
|
102 |
)
|
103 |
-
#
|
|
|
|
|
|
|
|
|
104 |
merged_df.sort_values(by=SORTING_COLUMN, ascending=SORTING_ASCENDING, inplace=True)
|
105 |
-
#
|
106 |
-
merged_df.
|
|
|
|
|
|
|
107 |
return merged_df
|
108 |
|
109 |
|
110 |
def get_benchmark_table(bench_df):
|
111 |
copy_df = bench_df.copy()
|
112 |
-
# filter
|
113 |
-
copy_df = copy_df[list(ALL_COLUMNS_MAPPING.keys())]
|
114 |
-
# rename
|
115 |
-
copy_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
|
116 |
# transform
|
117 |
-
copy_df["
|
118 |
-
copy_df["
|
119 |
-
process_model_name
|
120 |
-
)
|
121 |
# process quantization
|
122 |
-
copy_df["
|
123 |
-
lambda x: f"{x['
|
124 |
if x["Quantization ποΈ"] in ["BnB.4bit", "GPTQ.4bit"]
|
125 |
-
else x["
|
126 |
axis=1,
|
127 |
)
|
128 |
return copy_df
|
@@ -130,17 +146,18 @@ def get_benchmark_table(bench_df):
|
|
130 |
|
131 |
def get_benchmark_chart(bench_df):
|
132 |
copy_df = bench_df.copy()
|
|
|
|
|
133 |
# filter latency bigger than 100s
|
134 |
-
copy_df = copy_df[copy_df["
|
135 |
-
|
136 |
-
copy_df["model_type"] = copy_df["model_type"].apply(process_model_type)
|
137 |
fig = px.scatter(
|
138 |
copy_df,
|
139 |
-
y="
|
140 |
-
x="
|
141 |
-
size="
|
142 |
-
color="
|
143 |
-
custom_data=list(ALL_COLUMNS_MAPPING.
|
144 |
color_discrete_sequence=px.colors.qualitative.Light24,
|
145 |
)
|
146 |
fig.update_layout(
|
@@ -151,17 +168,17 @@ def get_benchmark_chart(bench_df):
|
|
151 |
"xanchor": "center",
|
152 |
"yanchor": "top",
|
153 |
},
|
154 |
-
xaxis_title="Per 1000
|
155 |
-
yaxis_title="Open LLM Score (%)",
|
156 |
-
legend_title="LLM
|
157 |
width=1200,
|
158 |
height=600,
|
159 |
)
|
160 |
fig.update_traces(
|
161 |
hovertemplate="<br>".join(
|
162 |
[
|
163 |
-
f"<b>{
|
164 |
-
for i,
|
165 |
]
|
166 |
)
|
167 |
)
|
@@ -176,17 +193,17 @@ def filter_query(
|
|
176 |
quantization_scheme,
|
177 |
score,
|
178 |
memory,
|
179 |
-
|
180 |
):
|
181 |
-
raw_df = get_benchmark_df(
|
182 |
filtered_df = raw_df[
|
183 |
-
raw_df["
|
184 |
-
& raw_df["
|
185 |
-
& raw_df["
|
186 |
& (
|
187 |
pd.concat(
|
188 |
[
|
189 |
-
raw_df["
|
190 |
for optimization in optimizations
|
191 |
],
|
192 |
axis=1,
|
@@ -197,7 +214,7 @@ def filter_query(
|
|
197 |
& (
|
198 |
pd.concat(
|
199 |
[
|
200 |
-
raw_df["
|
201 |
for quantization in quantization_scheme
|
202 |
],
|
203 |
axis=1,
|
@@ -205,8 +222,8 @@ def filter_query(
|
|
205 |
if len(quantization_scheme) > 0
|
206 |
else True
|
207 |
)
|
208 |
-
& (raw_df["
|
209 |
-
& (raw_df["
|
210 |
]
|
211 |
filtered_table = get_benchmark_table(filtered_df)
|
212 |
filtered_chart = get_benchmark_chart(filtered_df)
|
@@ -222,29 +239,29 @@ with demo:
|
|
222 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="descriptive-text")
|
223 |
|
224 |
with gr.Tabs(elem_classes="leaderboard-tabs"):
|
225 |
-
|
226 |
-
|
227 |
-
|
228 |
####################### HARDWARE TABS #######################
|
229 |
-
for i, (
|
230 |
-
# dummy placeholder of the
|
231 |
-
|
232 |
-
|
233 |
-
|
|
|
234 |
# placeholder for full dataframe
|
235 |
-
|
236 |
with gr.TabItem("Leaderboard π
", id=0):
|
237 |
gr.HTML(
|
238 |
"π Scroll to the right π for additional columns.",
|
239 |
elem_id="descriptive-text",
|
240 |
)
|
241 |
# Original leaderboard table
|
242 |
-
|
243 |
-
value=get_benchmark_table(
|
244 |
headers=list(ALL_COLUMNS_MAPPING.values()),
|
245 |
datatype=ALL_COLUMNS_DATATYPES,
|
246 |
-
elem_id="
|
247 |
-
# show_label=False,
|
248 |
)
|
249 |
with gr.TabItem("Plot π", id=1):
|
250 |
gr.HTML(
|
@@ -252,13 +269,13 @@ with demo:
|
|
252 |
elem_id="descriptive-text",
|
253 |
)
|
254 |
# Original leaderboard plot
|
255 |
-
|
256 |
-
value=get_benchmark_chart(
|
257 |
-
elem_id="
|
258 |
show_label=False,
|
259 |
)
|
260 |
|
261 |
-
|
262 |
with gr.TabItem("Control Panel ποΈ", id=2):
|
263 |
gr.HTML(
|
264 |
"Use this control panel to filter the leaderboard's table and plot.", # noqa: E501
|
@@ -328,7 +345,7 @@ with demo:
|
|
328 |
value="Filter π",
|
329 |
elem_id="filter-button",
|
330 |
)
|
331 |
-
for
|
332 |
filter_button.click(
|
333 |
filter_query,
|
334 |
[
|
@@ -339,9 +356,9 @@ with demo:
|
|
339 |
quantization_checkboxes,
|
340 |
score_slider,
|
341 |
memory_slider,
|
342 |
-
|
343 |
],
|
344 |
-
[
|
345 |
)
|
346 |
|
347 |
####################### ABOUT TAB #######################
|
@@ -356,18 +373,8 @@ with demo:
|
|
356 |
value=CITATION_BUTTON_TEXT,
|
357 |
label=CITATION_BUTTON_LABEL,
|
358 |
elem_id="citation-button",
|
359 |
-
|
360 |
-
|
361 |
-
|
362 |
-
# Restart space every hour
|
363 |
-
scheduler = BackgroundScheduler()
|
364 |
-
scheduler.add_job(
|
365 |
-
restart_space,
|
366 |
-
"interval",
|
367 |
-
seconds=3600,
|
368 |
-
args=[LLM_PERF_LEADERBOARD_REPO, OPTIMUM_TOKEN],
|
369 |
-
)
|
370 |
-
scheduler.start()
|
371 |
|
372 |
# Launch demo
|
373 |
-
demo.
|
|
|
1 |
import os
|
2 |
+
|
3 |
import gradio as gr
|
4 |
import pandas as pd
|
5 |
import plotly.express as px
|
6 |
+
from huggingface_hub.file_download import hf_hub_download
|
7 |
+
|
8 |
|
9 |
+
from src.utils import process_model_name, process_model_arch
|
10 |
from src.assets.css_html_js import custom_css
|
11 |
from src.assets.text_content import (
|
12 |
TITLE,
|
|
|
13 |
ABOUT_TEXT,
|
14 |
+
INTRODUCTION_TEXT,
|
15 |
EXAMPLE_CONFIG_TEXT,
|
16 |
CITATION_BUTTON_LABEL,
|
17 |
CITATION_BUTTON_TEXT,
|
18 |
)
|
|
|
|
|
|
|
|
|
|
|
|
|
19 |
|
20 |
+
HF_TOKEN = os.environ.get("HF_TOKEN", None)
|
|
|
|
|
21 |
LLM_PERF_DATASET_REPO = "optimum/llm-perf-dataset"
|
22 |
+
MACHINE_TO_HARDWARE = {"hf-dgx-01": "A100-80GB π₯οΈ"}
|
|
|
23 |
ALL_COLUMNS_MAPPING = {
|
24 |
+
# model
|
25 |
+
"Model": "Model π€",
|
26 |
+
"Arch": "Arch ποΈ",
|
27 |
+
"Size": "Size ποΈ",
|
28 |
+
# deployment settings
|
29 |
"backend.name": "Backend π",
|
30 |
"backend.torch_dtype": "Dtype π₯",
|
31 |
"optimizations": "Optimizations π οΈ",
|
32 |
"quantization": "Quantization ποΈ",
|
33 |
+
# throughput measurements
|
34 |
+
"decode.throughput(tokens/s)": "Decode Throughput (tokens/s) β¬οΈ",
|
35 |
+
"generate.throughput(tokens/s)": "E2E Throughput (tokens/s) β¬οΈ",
|
36 |
+
# latency measurements
|
37 |
+
"forward.latency(s)": "Prefill Latency (s) β¬οΈ",
|
38 |
+
"generate.latency(s)": "E2E Latency (s) β¬οΈ",
|
39 |
+
# memory measurements
|
40 |
+
"generate.max_memory_allocated(MB)": "Allocated Memory (MB) β¬οΈ",
|
41 |
+
"generate.max_memory_reserved(MB)": "Reserved Memory (MB) β¬οΈ",
|
42 |
+
"generate.max_memory_used(MB)": "Used Memory (MB) β¬οΈ",
|
43 |
+
# energy measurements
|
44 |
"generate.energy_consumption(tokens/kWh)": "Energy (tokens/kWh) β¬οΈ",
|
45 |
+
# quality measurements
|
46 |
+
"Score": "Avg Score (%) β¬οΈ",
|
|
|
47 |
}
|
48 |
+
SORTING_COLUMN = ["Score", "generate.throughput(tokens/s)"]
|
49 |
+
SORTING_ASCENDING = [False, True]
|
50 |
+
|
51 |
ALL_COLUMNS_DATATYPES = [
|
52 |
+
# open llm
|
53 |
+
"markdown",
|
54 |
+
"markdown",
|
55 |
+
"number",
|
56 |
+
# deployment settings
|
57 |
"str",
|
58 |
"str",
|
59 |
"str",
|
60 |
"str",
|
61 |
+
# measurements
|
62 |
+
"number",
|
63 |
+
"number",
|
64 |
+
"number",
|
65 |
+
"number",
|
66 |
+
"number",
|
67 |
+
"number",
|
68 |
+
"number",
|
69 |
"number",
|
70 |
"number",
|
71 |
"number",
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
72 |
]
|
|
|
|
|
73 |
|
|
|
74 |
|
75 |
+
def get_benchmark_df(machine="hf-dgx-01"):
|
76 |
+
# download data
|
77 |
+
hf_hub_download(
|
78 |
+
repo_id="optimum/llm-perf-dataset",
|
79 |
+
filename="open-llm.csv",
|
80 |
+
local_dir="dataset",
|
81 |
+
repo_type="dataset",
|
82 |
+
token=HF_TOKEN,
|
83 |
+
)
|
84 |
+
hf_hub_download(
|
85 |
+
repo_id="optimum/llm-perf-dataset",
|
86 |
+
filename=f"{machine}/full-report.csv",
|
87 |
+
local_dir="dataset",
|
88 |
+
repo_type="dataset",
|
89 |
+
token=HF_TOKEN,
|
90 |
+
)
|
91 |
+
open_llm = pd.read_csv("dataset/open-llm.csv")
|
92 |
+
full_report = pd.read_csv(f"dataset/{machine}/full-report.csv")
|
93 |
|
|
|
|
|
|
|
|
|
|
|
|
|
94 |
# merge on model
|
95 |
+
merged_df = open_llm.merge(full_report, left_on="Model", right_on="model")
|
|
|
|
|
96 |
# transpose energy consumption
|
97 |
merged_df["generate.energy_consumption(tokens/kWh)"] = (
|
98 |
1 / merged_df["generate.energy_consumption(kWh/token)"].fillna(1)
|
|
|
101 |
merged_df.loc[
|
102 |
merged_df["generate.energy_consumption(tokens/kWh)"] == 1,
|
103 |
"generate.energy_consumption(tokens/kWh)",
|
104 |
+
] = pd.NA
|
105 |
+
# add optimizations column
|
106 |
+
merged_df["optimizations"] = merged_df[
|
107 |
+
["backend.to_bettertransformer", "backend.use_flash_attention_2"]
|
108 |
+
].apply(
|
109 |
+
lambda x: "BetterTransformer"
|
110 |
+
if x["backend.to_bettertransformer"]
|
111 |
+
else ("FlashAttentionV2" if x["backend.use_flash_attention_2"] else "None"),
|
112 |
+
axis=1,
|
113 |
)
|
114 |
# add quantization scheme
|
115 |
+
merged_df["quantization"] = merged_df["backend.quantization_scheme"].apply(
|
116 |
lambda x: "BnB.4bit" if x == "bnb" else ("GPTQ.4bit" if x == "gptq" else "None")
|
117 |
)
|
118 |
+
# add decode throughput
|
119 |
+
merged_df["decode.throughput(tokens/s)"] = (
|
120 |
+
1000 / (merged_df["generate.latency(s)"] - merged_df["forward.latency(s)"])
|
121 |
+
).round(2)
|
122 |
+
# sort by metric
|
123 |
merged_df.sort_values(by=SORTING_COLUMN, ascending=SORTING_ASCENDING, inplace=True)
|
124 |
+
# filter columns
|
125 |
+
merged_df = merged_df[list(ALL_COLUMNS_MAPPING.keys())]
|
126 |
+
# rename columns
|
127 |
+
merged_df.rename(columns=ALL_COLUMNS_MAPPING, inplace=True)
|
128 |
+
|
129 |
return merged_df
|
130 |
|
131 |
|
132 |
def get_benchmark_table(bench_df):
|
133 |
copy_df = bench_df.copy()
|
|
|
|
|
|
|
|
|
134 |
# transform
|
135 |
+
copy_df["Model π€"] = copy_df["Model π€"].apply(process_model_name)
|
136 |
+
copy_df["Arch ποΈ"] = copy_df["Arch ποΈ"].apply(process_model_arch)
|
|
|
|
|
137 |
# process quantization
|
138 |
+
copy_df["Avg Score (%) β¬οΈ"] = copy_df.apply(
|
139 |
+
lambda x: f"{x['Avg Score (%) β¬οΈ']}**"
|
140 |
if x["Quantization ποΈ"] in ["BnB.4bit", "GPTQ.4bit"]
|
141 |
+
else x["Avg Score (%) β¬οΈ"],
|
142 |
axis=1,
|
143 |
)
|
144 |
return copy_df
|
|
|
146 |
|
147 |
def get_benchmark_chart(bench_df):
|
148 |
copy_df = bench_df.copy()
|
149 |
+
# transform
|
150 |
+
copy_df["Arch ποΈ"] = copy_df["Arch ποΈ"].apply(process_model_arch)
|
151 |
# filter latency bigger than 100s
|
152 |
+
# copy_df = copy_df[copy_df["E2E Latency (s) β¬οΈ"] <= 100]
|
153 |
+
|
|
|
154 |
fig = px.scatter(
|
155 |
copy_df,
|
156 |
+
y="Avg Score (%) β¬οΈ",
|
157 |
+
x="E2E Latency (s) β¬οΈ",
|
158 |
+
size="Allocated Memory (MB) β¬οΈ",
|
159 |
+
color="Arch ποΈ",
|
160 |
+
custom_data=list(ALL_COLUMNS_MAPPING.values()),
|
161 |
color_discrete_sequence=px.colors.qualitative.Light24,
|
162 |
)
|
163 |
fig.update_layout(
|
|
|
168 |
"xanchor": "center",
|
169 |
"yanchor": "top",
|
170 |
},
|
171 |
+
xaxis_title="Per 1000 Tokens Latency (s)",
|
172 |
+
yaxis_title="Avg Open LLM Score (%)",
|
173 |
+
legend_title="LLM Architecture",
|
174 |
width=1200,
|
175 |
height=600,
|
176 |
)
|
177 |
fig.update_traces(
|
178 |
hovertemplate="<br>".join(
|
179 |
[
|
180 |
+
f"<b>{column}:</b> %{{customdata[{i}]}}"
|
181 |
+
for i, column in enumerate(ALL_COLUMNS_MAPPING.values())
|
182 |
]
|
183 |
)
|
184 |
)
|
|
|
193 |
quantization_scheme,
|
194 |
score,
|
195 |
memory,
|
196 |
+
machine,
|
197 |
):
|
198 |
+
raw_df = get_benchmark_df(machine=machine)
|
199 |
filtered_df = raw_df[
|
200 |
+
raw_df["Model π€"].str.contains(text, case=False)
|
201 |
+
& raw_df["Backend π"].isin(backends)
|
202 |
+
& raw_df["Dtype π₯"].isin(datatypes)
|
203 |
& (
|
204 |
pd.concat(
|
205 |
[
|
206 |
+
raw_df["Optimizations π οΈ"].str.contains(optimization, case=False)
|
207 |
for optimization in optimizations
|
208 |
],
|
209 |
axis=1,
|
|
|
214 |
& (
|
215 |
pd.concat(
|
216 |
[
|
217 |
+
raw_df["Quantization ποΈ"].str.contains(quantization, case=False)
|
218 |
for quantization in quantization_scheme
|
219 |
],
|
220 |
axis=1,
|
|
|
222 |
if len(quantization_scheme) > 0
|
223 |
else True
|
224 |
)
|
225 |
+
& (raw_df["Avg Score (%) β¬οΈ"] >= score)
|
226 |
+
& (raw_df["Allocated Memory (MB) β¬οΈ"] <= memory)
|
227 |
]
|
228 |
filtered_table = get_benchmark_table(filtered_df)
|
229 |
filtered_chart = get_benchmark_chart(filtered_df)
|
|
|
239 |
gr.Markdown(INTRODUCTION_TEXT, elem_classes="descriptive-text")
|
240 |
|
241 |
with gr.Tabs(elem_classes="leaderboard-tabs"):
|
242 |
+
machine_placeholders = {}
|
243 |
+
machine_tables = {}
|
244 |
+
machine_plots = {}
|
245 |
####################### HARDWARE TABS #######################
|
246 |
+
for i, (machine, hardware) in enumerate(MACHINE_TO_HARDWARE.items()):
|
247 |
+
# dummy placeholder of the machine name
|
248 |
+
machine_placeholders[machine] = gr.Textbox(value=machine, visible=False)
|
249 |
+
|
250 |
+
with gr.TabItem(hardware, id=i):
|
251 |
+
with gr.Tabs(elem_classes="machine-tabs"):
|
252 |
# placeholder for full dataframe
|
253 |
+
machine_df = get_benchmark_df(machine=machine)
|
254 |
with gr.TabItem("Leaderboard π
", id=0):
|
255 |
gr.HTML(
|
256 |
"π Scroll to the right π for additional columns.",
|
257 |
elem_id="descriptive-text",
|
258 |
)
|
259 |
# Original leaderboard table
|
260 |
+
machine_tables[machine] = gr.components.Dataframe(
|
261 |
+
value=get_benchmark_table(machine_df),
|
262 |
headers=list(ALL_COLUMNS_MAPPING.values()),
|
263 |
datatype=ALL_COLUMNS_DATATYPES,
|
264 |
+
elem_id="machine-table",
|
|
|
265 |
)
|
266 |
with gr.TabItem("Plot π", id=1):
|
267 |
gr.HTML(
|
|
|
269 |
elem_id="descriptive-text",
|
270 |
)
|
271 |
# Original leaderboard plot
|
272 |
+
machine_plots[machine] = gr.components.Plot(
|
273 |
+
value=get_benchmark_chart(machine_df),
|
274 |
+
elem_id="machine-plot",
|
275 |
show_label=False,
|
276 |
)
|
277 |
|
278 |
+
###################### CONTROL PANEL #######################
|
279 |
with gr.TabItem("Control Panel ποΈ", id=2):
|
280 |
gr.HTML(
|
281 |
"Use this control panel to filter the leaderboard's table and plot.", # noqa: E501
|
|
|
345 |
value="Filter π",
|
346 |
elem_id="filter-button",
|
347 |
)
|
348 |
+
for machine in MACHINE_TO_HARDWARE:
|
349 |
filter_button.click(
|
350 |
filter_query,
|
351 |
[
|
|
|
356 |
quantization_checkboxes,
|
357 |
score_slider,
|
358 |
memory_slider,
|
359 |
+
machine_placeholders[machine],
|
360 |
],
|
361 |
+
[machine_tables[machine], machine_plots[machine]],
|
362 |
)
|
363 |
|
364 |
####################### ABOUT TAB #######################
|
|
|
373 |
value=CITATION_BUTTON_TEXT,
|
374 |
label=CITATION_BUTTON_LABEL,
|
375 |
elem_id="citation-button",
|
376 |
+
show_copy_button=True,
|
377 |
+
)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
378 |
|
379 |
# Launch demo
|
380 |
+
demo.launch(show_api=False)
|
requirements.txt
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
-
|
2 |
-
|
3 |
-
|
4 |
-
|
5 |
-
pandas==2.1.0
|
|
|
1 |
+
huggingface_hub
|
2 |
+
gradio
|
3 |
+
plotly
|
4 |
+
pandas
|
|
src/utils.py
CHANGED
@@ -17,26 +17,7 @@ def change_tab(query_param):
|
|
17 |
return gr.Tabs.update(selected=0)
|
18 |
|
19 |
|
20 |
-
|
21 |
-
HfApi().restart_space(repo_id=LLM_PERF_LEADERBOARD_REPO, token=OPTIMUM_TOKEN)
|
22 |
-
|
23 |
-
|
24 |
-
def load_dataset_repo(LLM_PERF_DATASET_REPO, OPTIMUM_TOKEN):
|
25 |
-
llm_perf_dataset_repo = None
|
26 |
-
if OPTIMUM_TOKEN:
|
27 |
-
print("Loading LLM-Perf-Dataset from Hub...")
|
28 |
-
llm_perf_dataset_repo = Repository(
|
29 |
-
local_dir="./llm-perf-dataset",
|
30 |
-
clone_from=LLM_PERF_DATASET_REPO,
|
31 |
-
token=OPTIMUM_TOKEN,
|
32 |
-
repo_type="dataset",
|
33 |
-
)
|
34 |
-
llm_perf_dataset_repo.git_pull()
|
35 |
-
|
36 |
-
return llm_perf_dataset_repo
|
37 |
-
|
38 |
-
|
39 |
-
LLM_MODEL_TYPES = {
|
40 |
# branded ?
|
41 |
"gpt_bigcode": "GPT-BigCode πΈ",
|
42 |
"RefinedWebModel": "Falcon π¦
",
|
@@ -69,8 +50,8 @@ def process_model_name(model_name):
|
|
69 |
return model_hyperlink(link, model_name)
|
70 |
|
71 |
|
72 |
-
def
|
73 |
-
if
|
74 |
-
return
|
75 |
else:
|
76 |
-
return
|
|
|
17 |
return gr.Tabs.update(selected=0)
|
18 |
|
19 |
|
20 |
+
LLM_MODEL_ARCHS = {
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
# branded ?
|
22 |
"gpt_bigcode": "GPT-BigCode πΈ",
|
23 |
"RefinedWebModel": "Falcon π¦
",
|
|
|
50 |
return model_hyperlink(link, model_name)
|
51 |
|
52 |
|
53 |
+
def process_model_arch(model_arch):
|
54 |
+
if model_arch in LLM_MODEL_ARCHS:
|
55 |
+
return LLM_MODEL_ARCHS[model_arch]
|
56 |
else:
|
57 |
+
return model_arch
|