Jae-Won Chung commited on
Commit
4e4fca8
·
1 Parent(s): 2eb5843

Detail and no-detail mode

Browse files
app.py CHANGED
@@ -6,7 +6,6 @@ where UI elements are actually defined.
6
 
7
  from __future__ import annotations
8
 
9
- from abc import abstractmethod
10
  import copy
11
  import json
12
  import random
@@ -17,6 +16,7 @@ import contextlib
17
  import argparse
18
  import os
19
  from pathlib import Path
 
20
  from typing import Literal, Any
21
  from dateutil import parser, tz
22
 
@@ -61,12 +61,12 @@ class TableManager:
61
  """Return the name of the leaderboard."""
62
 
63
  @abstractmethod
64
- def get_intro_text(self) -> tuple[str, str]:
65
- """Return the type of the introduction text and the introduction text."""
66
 
67
  @abstractmethod
68
- def get_detail_text(self) -> tuple[str, str]:
69
- """Return the type of the detail text and the detail text."""
70
 
71
  def get_benchmark_checkboxes(self) -> dict[str, list[str]]:
72
  """Return data for the benchmark selection checkboxes."""
@@ -84,7 +84,7 @@ class TableManager:
84
  """Return all available models."""
85
 
86
  @abstractmethod
87
- def set_filter_get_df(self, *filters) -> pd.DataFrame:
88
  """Set the current set of filters and return the filtered DataFrame."""
89
 
90
 
@@ -127,7 +127,7 @@ class LLMTableManager(TableManager):
127
  model_df[key] = val
128
  # Format the model name as an HTML anchor.
129
  model_df["Model"] = self._wrap_model_name(model_info["url"], model_info["nickname"])
130
- model_df["Params"] = model_info["params"]
131
  res_df = pd.concat([res_df, model_df])
132
 
133
  if res_df.empty:
@@ -137,7 +137,7 @@ class LLMTableManager(TableManager):
137
 
138
  # Order columns
139
  columns = res_df.columns.to_list()
140
- cols_to_order = ["Model", "Params"]
141
  cols_to_order.extend(self.schema.keys())
142
  columns = cols_to_order + [col for col in columns if col not in cols_to_order]
143
  res_df = res_df[columns]
@@ -145,21 +145,21 @@ class LLMTableManager(TableManager):
145
  # Order rows
146
  res_df = res_df.sort_values(by=["Model", *self.schema.keys(), "Energy/req (J)"])
147
 
148
- self.cur_df = self.full_df = res_df.round(2)
149
 
150
  # We need to set the default view separately when `gr.State` is forked.
151
- self.set_filter_get_df()
152
 
153
  def get_benchmark_checkboxes(self) -> dict[str, list[str]]:
154
  return self.schema
155
 
156
  def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
157
- return {"Target Time Per Output Token (TPOT) (s)": (0.0, 0.5, 0.01, 0.2)}
158
 
159
  def get_all_models(self) -> list[str]:
160
  return self.full_df["Model"].apply(self._unwrap_model_name).unique().tolist()
161
 
162
- def set_filter_get_df(self, *filters) -> pd.DataFrame:
163
  """Set the current set of filters and return the filtered DataFrame.
164
 
165
  Filters can either be completely empty, or be a concatenated list of
@@ -175,15 +175,15 @@ class LLMTableManager(TableManager):
175
  # Checkboxes
176
  for setup, choice in zip(self.schema, filters):
177
  index = index & self.full_df[setup].isin(choice)
178
- self.cur_df = self.full_df.loc[index]
179
 
180
  # Sliders (We just have TPOT for now.)
181
  # For each `Model`, we want to first filter out rows whose `Avg TPOT (s)` is greater than the slider value.
182
  # Finally, only just leave the row whose `Energy/req (J)` is the smallest.
183
  tpot_slo = filters[-1]
184
- self.cur_df = (
185
- self.cur_df
186
- .groupby("Model")[self.cur_df.columns]
187
  .apply(lambda x: x[x["Avg TPOT (s)"] <= tpot_slo], include_groups=True)
188
  .sort_values(by="Energy/req (J)")
189
  .reset_index(drop=True)
@@ -191,26 +191,16 @@ class LLMTableManager(TableManager):
191
  .head(1)
192
  )
193
 
194
- return self.cur_df
 
 
 
 
 
 
 
195
 
196
- def get_detail_text(self) -> tuple[str, str]:
197
- text = """
198
- Columns
199
- - **Model**: The name of the model.
200
- - **GPU**: Name of the GPU model used for benchmarking.
201
- - **Params**: Number of parameters in the model.
202
- - **TP**: Tensor parallelism degree.
203
- - **PP**: Pipeline parallelism degree. (TP * PP is the total number of GPUs used.)
204
- - **Energy/req (J)**: Energy consumed per request in Joules.
205
- - **Avg TPOT (s)**: Average time per output token in seconds.
206
- - **Token tput (toks/s)**: Average number of tokens generated by the engine per second.
207
- - **Avg Output Tokens**: Average number of output tokens in the LLM's response.
208
- - **Avg BS**: Average batch size of the serving engine over time.
209
- - **Max BS**: Maximum batch size configuration of the serving engine.
210
-
211
- For more detailed information, please take a look at the **About** tab.
212
- """
213
- return "markdown", text
214
 
215
 
216
  class LLMChatTableManager(LLMTableManager):
@@ -219,21 +209,59 @@ class LLMChatTableManager(LLMTableManager):
219
  def get_tab_name(self) -> str:
220
  return "LLM Chat"
221
 
222
- def get_intro_text(self) -> tuple[str, str]:
223
  text = """
224
  <h2>How much energy do GenAI models consume?</h2>
225
 
226
  <h3>LLM chatbot response generation</h3>
227
 
228
  <p style="font-size: 16px">
229
- We used <a href="https://ml.energy/zeus">Zeus</a> to benchmark various instruction-tuned LLMs in terms of how much time and energy they consume for inference.
 
230
  </p>
231
 
232
  <p style="font-size: 16px">
233
- An average Time Per Output Token (TPOT) of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and 1.3 tokens per word.
234
  </p>
235
  """
236
- return "html", text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
237
 
238
 
239
  class LLMCodeTableManager(LLMTableManager):
@@ -242,21 +270,58 @@ class LLMCodeTableManager(LLMTableManager):
242
  def get_tab_name(self) -> str:
243
  return "LLM Code"
244
 
245
- def get_intro_text(self) -> tuple[str, str]:
246
  text = """
247
  <h2>How much energy do GenAI models consume?</h2>
248
 
249
  <h3>LLM code generation</h3>
250
 
251
  <p style="font-size: 16px">
252
- We used <a href="https://ml.energy/zeus">Zeus</a> to benchmark various LLMs specialized for coding in terms of how much time and energy they consume for inference.
 
253
  </p>
254
 
255
  <p style="font-size: 16px">
256
- An average Time Per Output Token (TPOT) of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and 1.3 tokens per word.
257
  </p>
258
  """
259
- return "html", text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
260
 
261
 
262
  class VLMChatTableManager(LLMTableManager):
@@ -265,21 +330,58 @@ class VLMChatTableManager(LLMTableManager):
265
  def get_tab_name(self) -> str:
266
  return "VLM Visual Chat"
267
 
268
- def get_intro_text(self) -> tuple[str, str]:
269
  text = """
270
  <h2>How much energy do GenAI models consume?</h2>
271
 
272
  <h3>VLM visual chatbot response generation</h3>
273
 
274
  <p style="font-size: 16px">
275
- We used <a href="https://ml.energy/zeus">Zeus</a> to benchmark various Vision Language Models (VLMs) in terms of how much time and energy they consume for inference.
 
276
  </p>
277
 
278
  <p style="font-size: 16px">
279
- A Time Per Output Token (TPOT) of 0.2 seconds roughly corresponds to a person reading at 240 words per minute and 1.3 tokens per word.
280
  </p>
281
  """
282
- return "html", text
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
283
 
284
 
285
  class DiffusionTableManager(TableManager):
@@ -301,8 +403,10 @@ class DiffusionTableManager(TableManager):
301
 
302
  if "to video" in task_name.lower():
303
  self.energy_col = "Energy/video (J)"
 
304
  elif "to image" in task_name.lower():
305
  self.energy_col = "Energy/image (J)"
 
306
  else:
307
  raise ValueError(f"Unknown task name: {task_name=}")
308
 
@@ -348,10 +452,10 @@ class DiffusionTableManager(TableManager):
348
  # Order rows
349
  res_df = res_df.sort_values(by=["Model", *self.schema.keys(), self.energy_col])
350
 
351
- self.cur_df = self.full_df = res_df.round(2)
352
 
353
  # We need to set the default view separately when `gr.State` is forked.
354
- self.set_filter_get_df()
355
 
356
  def get_benchmark_checkboxes(self) -> dict[str, list[str]]:
357
  return self.schema
@@ -359,7 +463,7 @@ class DiffusionTableManager(TableManager):
359
  def get_all_models(self) -> list[str]:
360
  return self.full_df["Model"].apply(self._unwrap_model_name).unique().tolist()
361
 
362
- def set_filter_get_df(self, *filters) -> pd.DataFrame:
363
  """Set the current set of filters and return the filtered DataFrame.
364
 
365
  Filters can either be completely empty, or be a concatenated list of
@@ -375,15 +479,15 @@ class DiffusionTableManager(TableManager):
375
  # Checkboxes
376
  for setup, choice in zip(self.schema, filters):
377
  index = index & self.full_df[setup].isin(choice)
378
- self.cur_df = self.full_df.loc[index]
379
 
380
  # Sliders (We just have Batch latency for now.)
381
  # For each `Model`, we want to first filter out rows whose `Batch latency (s)` is greater than the slider value.
382
  # Finally, only just leave the row whose `Energy/image (J)` or `Energy/video (J)` is the smallest.
383
  batch_latency = filters[-1]
384
- self.cur_df = (
385
- self.cur_df
386
- .groupby("Model")[self.cur_df.columns]
387
  .apply(
388
  lambda x: x[x["Batch latency (s)"] <= batch_latency],
389
  include_groups=True,
@@ -394,7 +498,19 @@ class DiffusionTableManager(TableManager):
394
  .head(1)
395
  )
396
 
397
- return self.cur_df
 
 
 
 
 
 
 
 
 
 
 
 
398
 
399
 
400
  class DiffusionT2ITableManager(DiffusionTableManager):
@@ -403,36 +519,49 @@ class DiffusionT2ITableManager(DiffusionTableManager):
403
  def get_tab_name(self) -> str:
404
  return "Diffusion Text to image"
405
 
406
- def get_intro_text(self) -> tuple[str, str]:
407
  text = """
408
  <h2>Diffusion text-to-image generation</h2></br>
409
 
410
  <p style="font-size: 16px">
411
- We used <a href="https://ml.energy/zeus">Zeus</a> to benchmark various open source LLMs in terms of how much time and energy they consume for inference.
 
412
  </p>
413
 
414
  <p style="font-size: 16px">
415
- The time and energy consumption of Diffusion models are affected by not only the size of the model, but also the number of denoising steps and the resolution of the generated images.
416
  </p>
417
  """
418
- return "html", text
419
-
420
- def get_detail_text(self) -> tuple[str, str]:
421
- text = """
422
- Columns
423
- - **Model**: The name of the model.
424
- - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
425
- - **Total params**: Total number of parameters in the model, including encoders and decoders.
426
- - **GPU**: Name of the GPU model used for benchmarking.
427
- - **Energy/image (J)**: Energy consumed per generated image in Joules.
428
- - **Batch latency (s)**: Time taken to generate a batch of images in seconds.
429
- - **Batch size**: Number of prompts/images in a batch.
430
- - **Denoising steps**: Number of denoising steps used for the diffusion model.
431
- - **Resolution**: Resolution of the generated image.
432
-
433
- For more detailed information, please take a look at the **About** tab.
434
- """
435
- return "markdown", text
 
 
 
 
 
 
 
 
 
 
 
 
436
 
437
  def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
438
  return {"Batch latency (s)": (0.0, 60.0, 1.0, 10.0)}
@@ -444,37 +573,50 @@ class DiffusionT2VTableManager(DiffusionTableManager):
444
  def get_tab_name(self) -> str:
445
  return "Diffusion Text to video"
446
 
447
- def get_intro_text(self) -> tuple[str, str]:
448
  text = """
449
  <h2>Diffusion text-to-video generation</h2></br>
450
 
451
  <p style="font-size: 16px">
452
- We used <a href="https://ml.energy/zeus">Zeus</a> to benchmark various open source LLMs in terms of how much time and energy they consume for inference.
 
453
  </p>
454
 
455
  <p style="font-size: 16px">
456
- The time and energy consumption of Diffusion models are affected by not only the size of the model, but also the number of denoising steps, the resolution of the generated video, and the total number of frames in the video.
457
  </p>
458
  """
459
- return "html", text
460
-
461
- def get_detail_text(self) -> tuple[str, str]:
462
- text = """
463
- Columns
464
- - **Model**: The name of the model.
465
- - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
466
- - **Total params**: Total number of parameters in the model, including encoders and decoders.
467
- - **GPU**: Name of the GPU model used for benchmarking.
468
- - **Energy/video (J)**: Energy consumed per generated video in Joules.
469
- - **Batch latency (s)**: Time taken to generate a batch of videos in seconds.
470
- - **Batch size**: Number of prompts/videos in a batch.
471
- - **Denoising steps**: Number of denoising steps used for the diffusion model.
472
- - **Frames**: Number of frames in the generated video.
473
- - **Resolution**: Resolution of the generated video.
474
-
475
- For more detailed information, please take a look at the **About** tab.
476
- """
477
- return "markdown", text
 
 
 
 
 
 
 
 
 
 
 
 
478
 
479
  def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
480
  return {"Batch latency (s)": (0.0, 60.0, 1.0, 10.0)}
@@ -486,37 +628,50 @@ class DiffusionI2VTableManager(DiffusionTableManager):
486
  def get_tab_name(self) -> str:
487
  return "Diffusion Image to video"
488
 
489
- def get_intro_text(self) -> tuple[str, str]:
490
  text = """
491
  <h2>Diffusion image-to-video generation</h2></br>
492
 
493
  <p style="font-size: 16px">
494
- We used <a href="https://ml.energy/zeus">Zeus</a> to benchmark various open source LLMs in terms of how much time and energy they consume for inference.
 
495
  </p>
496
 
497
  <p style="font-size: 16px">
498
- The time and energy consumption of Diffusion models are affected by not only the size of the model, but also the number of denoising steps, the resolution of the generated video, and the total number of frames in the video.
499
  </p>
500
  """
501
- return "html", text
502
-
503
- def get_detail_text(self) -> tuple[str, str]:
504
- text = """
505
- Columns
506
- - **Model**: The name of the model.
507
- - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
508
- - **Total params**: Total number of parameters in the model, including encoders and decoders.
509
- - **GPU**: Name of the GPU model used for benchmarking.
510
- - **Energy/video (J)**: Energy consumed per generated video in Joules.
511
- - **Batch latency (s)**: Time taken to generate a batch of videos in seconds.
512
- - **Batch size**: Number of prompts/videos in a batch.
513
- - **Denoising steps**: Number of denoising steps used for the diffusion model.
514
- - **Frames**: Number of frames in the generated video.
515
- - **Resolution**: Resolution of the generated video.
516
-
517
- For more detailed information, please take a look at the **About** tab.
518
- """
519
- return "markdown", text
 
 
 
 
 
 
 
 
 
 
 
 
520
 
521
  def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
522
  return {"Batch latency (s)": (0.0, 120.0, 1.0, 45.0)}
@@ -563,7 +718,7 @@ class LegacyTableManager:
563
  self.full_df = df
564
 
565
  # Default view of the table is to only show the first options.
566
- self.set_filter_get_df()
567
 
568
  def _read_tables(self, data_dir: str) -> pd.DataFrame:
569
  """Read tables."""
@@ -622,7 +777,7 @@ class LegacyTableManager:
622
  gr.Dropdown.update(choices=["None", *columns]),
623
  ]
624
 
625
- def set_filter_get_df(self, *filters) -> pd.DataFrame:
626
  """Set the current set of filters and return the filtered DataFrame."""
627
  # If the filter is empty, we default to the first choice for each key.
628
  if not filters:
@@ -639,7 +794,7 @@ class LegacyTableManager:
639
  """Return the leaderboard's introduction text in HTML."""
640
  return """
641
  <div align="center">
642
- <h2 style="color: #23d175">This is the legacy ML.ENERGY LLM leaderboard. This will be removed by the end of the year.</h2>
643
  </div>
644
 
645
  <h3>How much energy do modern Large Language Models (LLMs) consume for inference?</h3>
@@ -795,6 +950,12 @@ table th:first-child {
795
  #citation-header > div > span {
796
  font-size: 16px !important;
797
  }
 
 
 
 
 
 
798
  """
799
 
800
  # The app will not start without a controller address set.
@@ -866,8 +1027,8 @@ def consumed_more_energy_message(energy_a, energy_b):
866
  # Colosseum event handlers
867
  def on_load():
868
  """Intialize the dataframe, shuffle the model preference dropdown choices."""
869
- dataframe = global_ltbm.set_filter_get_df()
870
- dataframes = [global_tbm.set_filter_get_df() for global_tbm in global_tbms]
871
  return dataframe, *dataframes
872
 
873
 
@@ -980,6 +1141,14 @@ def play_again():
980
  ]
981
 
982
 
 
 
 
 
 
 
 
 
983
  focus_prompt_input_js = """
984
  function() {
985
  for (let textarea of document.getElementsByTagName("textarea")) {
@@ -994,6 +1163,7 @@ function() {
994
  with gr.Blocks(css=custom_css) as block:
995
  tbm = gr.State(global_ltbm) # type: ignore
996
  local_tbms: list[TableManager] = [gr.State(global_tbm) for global_tbm in global_tbms] # type: ignore
 
997
 
998
  with gr.Box():
999
  gr.HTML(
@@ -1144,19 +1314,16 @@ with gr.Blocks(css=custom_css) as block:
1144
 
1145
  # Tab: Leaderboards.
1146
  dataframes = []
 
 
 
1147
  for global_tbm, local_tbm in zip(global_tbms, local_tbms):
1148
  with gr.Tab(global_tbm.get_tab_name()):
1149
  # Box: Introduction text.
1150
  with gr.Box():
1151
- intro_text_type, intro_text = global_tbm.get_intro_text()
1152
- if intro_text_type not in ["markdown", "html"]:
1153
- raise ValueError(f"Invalid text type '{intro_text_type}' from {local_tbm}")
1154
- if intro_text_type == "markdown":
1155
- gr.Markdown(intro_text)
1156
- else:
1157
- gr.HTML(intro_text)
1158
-
1159
- # Block: Checkboxes and sliders to select benchmarking parameters.
1160
  with gr.Row():
1161
  checkboxes: list[gr.CheckboxGroup] = []
1162
  for key, choices in global_tbm.get_benchmark_checkboxes().items():
@@ -1165,7 +1332,12 @@ with gr.Blocks(css=custom_css) as block:
1165
 
1166
  sliders: list[gr.Slider] = []
1167
  for key, (min_val, max_val, step, default) in global_tbm.get_benchmark_sliders().items():
1168
- sliders.append(gr.Slider(minimum=min_val, maximum=max_val, value=default, step=step, label=key))
 
 
 
 
 
1169
 
1170
  # Block: Leaderboard table.
1171
  with gr.Row():
@@ -1173,6 +1345,7 @@ with gr.Blocks(css=custom_css) as block:
1173
  type="pandas",
1174
  elem_classes=["tab-leaderboard"],
1175
  interactive=False,
 
1176
  )
1177
  dataframes.append(dataframe)
1178
 
@@ -1181,23 +1354,18 @@ with gr.Blocks(css=custom_css) as block:
1181
  None, None, None, _js=dataframe_update_js, queue=False
1182
  )
1183
  # Table automatically updates when users check or uncheck any checkbox or move any slider.
1184
- for element in [*checkboxes, *sliders]:
1185
  element.change(
1186
  global_tbm.__class__.set_filter_get_df,
1187
- inputs=[local_tbm, *checkboxes, *sliders],
1188
  outputs=dataframe,
1189
  queue=False,
1190
  )
1191
 
1192
  # Block: More details about the leaderboard.
1193
  with gr.Box():
1194
- detail_text_type, detail_text = global_tbm.get_detail_text()
1195
- if detail_text_type not in ["markdown", "html"]:
1196
- raise ValueError(f"Invalid text type '{detail_text_type}' from {local_tbm}")
1197
- if detail_text_type == "markdown":
1198
- gr.Markdown(detail_text)
1199
- else:
1200
- gr.HTML(detail_text)
1201
 
1202
  # Block: Leaderboard date.
1203
  with gr.Row():
@@ -1208,7 +1376,7 @@ with gr.Blocks(css=custom_css) as block:
1208
  # Tab: Legacy leaderboard.
1209
  with gr.Tab("LLM Leaderboard (legacy)"):
1210
  with gr.Box():
1211
- gr.HTML(global_ltbm.get_intro_text())
1212
 
1213
  # Block: Checkboxes to select benchmarking parameters.
1214
  with gr.Row():
@@ -1247,6 +1415,21 @@ with gr.Blocks(css=custom_css) as block:
1247
  with gr.Tab("About"):
1248
  gr.Markdown(open("docs/about.md").read())
1249
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1250
  # Citation
1251
  with gr.Accordion("📚 Citation", open=False, elem_id="citation-header"):
1252
  citation_text = open("docs/citation.bib").read()
 
6
 
7
  from __future__ import annotations
8
 
 
9
  import copy
10
  import json
11
  import random
 
16
  import argparse
17
  import os
18
  from pathlib import Path
19
+ from abc import abstractmethod
20
  from typing import Literal, Any
21
  from dateutil import parser, tz
22
 
 
61
  """Return the name of the leaderboard."""
62
 
63
  @abstractmethod
64
+ def get_intro_text(self) -> str:
65
+ """Return the introduction text to be inserted above the table."""
66
 
67
  @abstractmethod
68
+ def get_detail_text(self, detail_mode: bool) -> str:
69
+ """Return the detail text chunk to be inserted below the table."""
70
 
71
  def get_benchmark_checkboxes(self) -> dict[str, list[str]]:
72
  """Return data for the benchmark selection checkboxes."""
 
84
  """Return all available models."""
85
 
86
  @abstractmethod
87
+ def set_filter_get_df(self, detail_mode: bool, *filters) -> pd.DataFrame:
88
  """Set the current set of filters and return the filtered DataFrame."""
89
 
90
 
 
127
  model_df[key] = val
128
  # Format the model name as an HTML anchor.
129
  model_df["Model"] = self._wrap_model_name(model_info["url"], model_info["nickname"])
130
+ model_df["Params (B)"] = model_info["params"]
131
  res_df = pd.concat([res_df, model_df])
132
 
133
  if res_df.empty:
 
137
 
138
  # Order columns
139
  columns = res_df.columns.to_list()
140
+ cols_to_order = ["Model", "Params (B)"]
141
  cols_to_order.extend(self.schema.keys())
142
  columns = cols_to_order + [col for col in columns if col not in cols_to_order]
143
  res_df = res_df[columns]
 
145
  # Order rows
146
  res_df = res_df.sort_values(by=["Model", *self.schema.keys(), "Energy/req (J)"])
147
 
148
+ self.full_df = res_df.round(2)
149
 
150
  # We need to set the default view separately when `gr.State` is forked.
151
+ self.set_filter_get_df(detail_mode=False)
152
 
153
  def get_benchmark_checkboxes(self) -> dict[str, list[str]]:
154
  return self.schema
155
 
156
  def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
157
+ return {"Target Average TPOT (Time Per Output Token) (s)": (0.0, 0.5, 0.01, 0.2)}
158
 
159
  def get_all_models(self) -> list[str]:
160
  return self.full_df["Model"].apply(self._unwrap_model_name).unique().tolist()
161
 
162
+ def set_filter_get_df(self, detail_mode: bool, *filters) -> pd.DataFrame:
163
  """Set the current set of filters and return the filtered DataFrame.
164
 
165
  Filters can either be completely empty, or be a concatenated list of
 
175
  # Checkboxes
176
  for setup, choice in zip(self.schema, filters):
177
  index = index & self.full_df[setup].isin(choice)
178
+ cur_df = self.full_df.loc[index]
179
 
180
  # Sliders (We just have TPOT for now.)
181
  # For each `Model`, we want to first filter out rows whose `Avg TPOT (s)` is greater than the slider value.
182
  # Finally, only just leave the row whose `Energy/req (J)` is the smallest.
183
  tpot_slo = filters[-1]
184
+ cur_df = (
185
+ cur_df
186
+ .groupby("Model")[cur_df.columns]
187
  .apply(lambda x: x[x["Avg TPOT (s)"] <= tpot_slo], include_groups=True)
188
  .sort_values(by="Energy/req (J)")
189
  .reset_index(drop=True)
 
191
  .head(1)
192
  )
193
 
194
+ if not detail_mode:
195
+ core_columns = ["Model", "Params (B)", "GPU", "Energy/req (J)"]
196
+ readable_name_mapping = {
197
+ "Params (B)": "Parameters (Billions)",
198
+ "GPU": "GPU model",
199
+ "Energy/req (J)": "Energy per response (Joules)",
200
+ }
201
+ cur_df = cur_df[core_columns].rename(columns=readable_name_mapping)
202
 
203
+ return cur_df
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
204
 
205
 
206
  class LLMChatTableManager(LLMTableManager):
 
209
  def get_tab_name(self) -> str:
210
  return "LLM Chat"
211
 
212
+ def get_intro_text(self) -> str:
213
  text = """
214
  <h2>How much energy do GenAI models consume?</h2>
215
 
216
  <h3>LLM chatbot response generation</h3>
217
 
218
  <p style="font-size: 16px">
219
+ Large language models (LLMs), especially the instruction-tuned ones, can generate human-like responses to chat prompts.
220
+ Using <a href="https://ml.energy/zeus">Zeus</a> for energy measurement, we created a leaderboard for LLM chat energy consumption.
221
  </p>
222
 
223
  <p style="font-size: 16px">
224
+ More models will be added over time. Stay tuned!
225
  </p>
226
  """
227
+ return text
228
+
229
+ def get_detail_text(self, detail_mode: bool) -> str:
230
+ if detail_mode:
231
+ text = """
232
+ Columns
233
+ - **Model**: The name of the model.
234
+ - **Params (B)**: Number of parameters in the model.
235
+ - **GPU**: Name of the GPU model used for benchmarking.
236
+ - **TP**: Tensor parallelism degree.
237
+ - **PP**: Pipeline parallelism degree. (TP * PP is the total number of GPUs used.)
238
+ - **Energy/req (J)**: Energy consumed per request in Joules.
239
+ - **Avg TPOT (s)**: Average time per output token in seconds.
240
+ - **Token tput (toks/s)**: Average number of tokens generated by the engine per second.
241
+ - **Avg Output Tokens**: Average number of output tokens in the LLM's response.
242
+ - **Avg BS**: Average batch size of the serving engine over time.
243
+ - **Max BS**: Maximum batch size configuration of the serving engine.
244
+
245
+ **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
246
+ An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
247
+ You can tweak the TPOT slider to adjust the target average TPOT for the models.
248
+
249
+ For more detailed information, please take a look at the **About** tab.
250
+ """
251
+ else:
252
+ text = """
253
+ Columns
254
+ - **Model**: The name of the model.
255
+ - **Parameters (Billions)**: Number of parameters in the model. This is the size of the model.
256
+ - **GPU model**: Name of the GPU model used for benchmarking.
257
+ - **Energy per response (Joules)**: Energy consumed for each LLM response in Joules.
258
+
259
+ Checking "Show more technical details" above the table will reveal more detailed columns.
260
+ Also, for more detailed information, please take a look at the **About** tab.
261
+ """
262
+
263
+ return text
264
+
265
 
266
 
267
  class LLMCodeTableManager(LLMTableManager):
 
270
  def get_tab_name(self) -> str:
271
  return "LLM Code"
272
 
273
+ def get_intro_text(self) -> str:
274
  text = """
275
  <h2>How much energy do GenAI models consume?</h2>
276
 
277
  <h3>LLM code generation</h3>
278
 
279
  <p style="font-size: 16px">
280
+ Large language models (LLMs) are also capable of generating code.
281
+ Using <a href="https://ml.energy/zeus">Zeus</a> for energy measurement, we created a leaderboard for the energy consumption of LLMs specifically trained for code generation.
282
  </p>
283
 
284
  <p style="font-size: 16px">
285
+ More models will be added over time. Stay tuned!
286
  </p>
287
  """
288
+ return text
289
+
290
+ def get_detail_text(self, detail_mode: bool) -> str:
291
+ if detail_mode:
292
+ text = """
293
+ Columns
294
+ - **Model**: The name of the model.
295
+ - **Params (B)**: Number of parameters in the model.
296
+ - **GPU**: Name of the GPU model used for benchmarking.
297
+ - **TP**: Tensor parallelism degree.
298
+ - **PP**: Pipeline parallelism degree. (TP * PP is the total number of GPUs used.)
299
+ - **Energy/req (J)**: Energy consumed per request in Joules.
300
+ - **Avg TPOT (s)**: Average time per output token in seconds.
301
+ - **Token tput (toks/s)**: Average number of tokens generated by the engine per second.
302
+ - **Avg Output Tokens**: Average number of output tokens in the LLM's response.
303
+ - **Avg BS**: Average batch size of the serving engine over time.
304
+ - **Max BS**: Maximum batch size configuration of the serving engine.
305
+
306
+ **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
307
+ An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
308
+ You can tweak the TPOT slider to adjust the target average TPOT for the models.
309
+
310
+ For more detailed information, please take a look at the **About** tab.
311
+ """
312
+ else:
313
+ text = """
314
+ Columns
315
+ - **Model**: The name of the model.
316
+ - **Parameters (Billions)**: Number of parameters in the model. This is the size of the model.
317
+ - **GPU model**: Name of the GPU model used for benchmarking.
318
+ - **Energy per response (Joules)**: Energy consumed for each LLM response in Joules.
319
+
320
+ Checking "Show more technical details" above the table will reveal more detailed columns.
321
+ Also, for more detailed information, please take a look at the **About** tab.
322
+ """
323
+
324
+ return text
325
 
326
 
327
  class VLMChatTableManager(LLMTableManager):
 
330
  def get_tab_name(self) -> str:
331
  return "VLM Visual Chat"
332
 
333
+ def get_intro_text(self) -> str:
334
  text = """
335
  <h2>How much energy do GenAI models consume?</h2>
336
 
337
  <h3>VLM visual chatbot response generation</h3>
338
 
339
  <p style="font-size: 16px">
340
+ Vision language models (VLMs) are large language models that can understand images along with text and generate human-like responses to chat prompts with images.
341
+ Using <a href="https://ml.energy/zeus">Zeus</a> for energy measurement, we created a leaderboard for VLM chat energy consumption.
342
  </p>
343
 
344
  <p style="font-size: 16px">
345
+ More models will be added over time. Stay tuned!
346
  </p>
347
  """
348
+ return text
349
+
350
+ def get_detail_text(self, detail_mode: bool) -> str:
351
+ if detail_mode:
352
+ text = """
353
+ Columns
354
+ - **Model**: The name of the model.
355
+ - **Params (B)**: Number of parameters in the model.
356
+ - **GPU**: Name of the GPU model used for benchmarking.
357
+ - **TP**: Tensor parallelism degree.
358
+ - **PP**: Pipeline parallelism degree. (TP * PP is the total number of GPUs used.)
359
+ - **Energy/req (J)**: Energy consumed per request in Joules.
360
+ - **Avg TPOT (s)**: Average time per output token in seconds.
361
+ - **Token tput (toks/s)**: Average number of tokens generated by the engine per second.
362
+ - **Avg Output Tokens**: Average number of output tokens in the LLM's response.
363
+ - **Avg BS**: Average batch size of the serving engine over time.
364
+ - **Max BS**: Maximum batch size configuration of the serving engine.
365
+
366
+ **TPOT (Time Per Output Token)** is the time between each token generated by LLMs as part of their response.
367
+ An average TPOT of 0.20 seconds roughly corresponds to a person reading at 240 words per minute and assuming one word is 1.3 tokens on average.
368
+ You can tweak the TPOT slider to adjust the target average TPOT for the models.
369
+
370
+ For more detailed information, please take a look at the **About** tab.
371
+ """
372
+ else:
373
+ text = """
374
+ Columns
375
+ - **Model**: The name of the model.
376
+ - **Parameters (Billions)**: Number of parameters in the model. This is the size of the model.
377
+ - **GPU model**: Name of the GPU model used for benchmarking.
378
+ - **Energy per response (Joules)**: Energy consumed for each LLM response in Joules.
379
+
380
+ Checking "Show more technical details" above the table will reveal more detailed columns.
381
+ Also, for more detailed information, please take a look at the **About** tab.
382
+ """
383
+
384
+ return text
385
 
386
 
387
  class DiffusionTableManager(TableManager):
 
403
 
404
  if "to video" in task_name.lower():
405
  self.energy_col = "Energy/video (J)"
406
+ self.energy_col_readable = "Energy per video (Joules)"
407
  elif "to image" in task_name.lower():
408
  self.energy_col = "Energy/image (J)"
409
+ self.energy_col_readable = "Energy per image (Joules)"
410
  else:
411
  raise ValueError(f"Unknown task name: {task_name=}")
412
 
 
452
  # Order rows
453
  res_df = res_df.sort_values(by=["Model", *self.schema.keys(), self.energy_col])
454
 
455
+ self.full_df = res_df.round(2)
456
 
457
  # We need to set the default view separately when `gr.State` is forked.
458
+ self.set_filter_get_df(detail_mode=False)
459
 
460
  def get_benchmark_checkboxes(self) -> dict[str, list[str]]:
461
  return self.schema
 
463
  def get_all_models(self) -> list[str]:
464
  return self.full_df["Model"].apply(self._unwrap_model_name).unique().tolist()
465
 
466
+ def set_filter_get_df(self, detail_mode: bool, *filters) -> pd.DataFrame:
467
  """Set the current set of filters and return the filtered DataFrame.
468
 
469
  Filters can either be completely empty, or be a concatenated list of
 
479
  # Checkboxes
480
  for setup, choice in zip(self.schema, filters):
481
  index = index & self.full_df[setup].isin(choice)
482
+ cur_df = self.full_df.loc[index]
483
 
484
  # Sliders (We just have Batch latency for now.)
485
  # For each `Model`, we want to first filter out rows whose `Batch latency (s)` is greater than the slider value.
486
  # Finally, only just leave the row whose `Energy/image (J)` or `Energy/video (J)` is the smallest.
487
  batch_latency = filters[-1]
488
+ cur_df = (
489
+ cur_df
490
+ .groupby("Model")[cur_df.columns]
491
  .apply(
492
  lambda x: x[x["Batch latency (s)"] <= batch_latency],
493
  include_groups=True,
 
498
  .head(1)
499
  )
500
 
501
+ if not detail_mode:
502
+ core_columns = ["Model", "Denoising params", "GPU", "Denoising steps", "Resolution", "Frames", self.energy_col]
503
+ readable_name_mapping = {
504
+ "Denoising params": "Denoising parameters (Billions)",
505
+ "GPU": "GPU model",
506
+ self.energy_col: self.energy_col_readable,
507
+ }
508
+ for column in cur_df.columns:
509
+ if column not in core_columns:
510
+ cur_df = cur_df.drop(column, axis=1)
511
+ cur_df = cur_df.rename(columns=readable_name_mapping)
512
+
513
+ return cur_df
514
 
515
 
516
  class DiffusionT2ITableManager(DiffusionTableManager):
 
519
  def get_tab_name(self) -> str:
520
  return "Diffusion Text to image"
521
 
522
+ def get_intro_text(self) -> str:
523
  text = """
524
  <h2>Diffusion text-to-image generation</h2></br>
525
 
526
  <p style="font-size: 16px">
527
+ Diffusion models generate images that align with input text prompts.
528
+ Using <a href="https://ml.energy/zeus">Zeus</a> for energy measurement, we created a leaderboard for the energy consumption of Diffusion text-to-image.
529
  </p>
530
 
531
  <p style="font-size: 16px">
532
+ More models will be added over time. Stay tuned!
533
  </p>
534
  """
535
+ return text
536
+
537
+ def get_detail_text(self, detail_mode: bool) -> str:
538
+ if detail_mode:
539
+ text = """
540
+ Columns
541
+ - **Model**: The name of the model.
542
+ - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
543
+ - **Total params**: Total number of parameters in the model, including encoders and decoders.
544
+ - **GPU**: Name of the GPU model used for benchmarking.
545
+ - **Energy/image (J)**: Energy consumed per generated image in Joules.
546
+ - **Batch latency (s)**: Time taken to generate a batch of images in seconds.
547
+ - **Batch size**: Number of prompts/images in a batch.
548
+ - **Denoising steps**: Number of denoising steps used for the diffusion model.
549
+ - **Resolution**: Resolution of the generated image.
550
+
551
+ For more detailed information, please take a look at the **About** tab.
552
+ """
553
+ else:
554
+ text = """
555
+ Columns
556
+ - **Model**: The name of the model.
557
+ - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the image.
558
+ - **GPU model**: Name of the GPU model used for benchmarking.
559
+ - **Energy per image (Joules)**: Energy consumed for each generated image in Joules.
560
+
561
+ Checking "Show more technical details" above the table will reveal more detailed columns.
562
+ Also, for more detailed information, please take a look at the **About** tab.
563
+ """
564
+ return text
565
 
566
  def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
567
  return {"Batch latency (s)": (0.0, 60.0, 1.0, 10.0)}
 
573
  def get_tab_name(self) -> str:
574
  return "Diffusion Text to video"
575
 
576
+ def get_intro_text(self) -> str:
577
  text = """
578
  <h2>Diffusion text-to-video generation</h2></br>
579
 
580
  <p style="font-size: 16px">
581
+ Diffusion models generate videos that align with input text prompts.
582
+ Using <a href="https://ml.energy/zeus">Zeus</a> for energy measurement, we created a leaderboard for the energy consumption of Diffusion text-to-video.
583
  </p>
584
 
585
  <p style="font-size: 16px">
586
+ More models will be added over time. Stay tuned!
587
  </p>
588
  """
589
+ return text
590
+
591
+ def get_detail_text(self, detail_mode: bool) -> str:
592
+ if detail_mode:
593
+ text = """
594
+ Columns
595
+ - **Model**: The name of the model.
596
+ - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
597
+ - **Total params**: Total number of parameters in the model, including encoders and decoders.
598
+ - **GPU**: Name of the GPU model used for benchmarking.
599
+ - **Energy/video (J)**: Energy consumed per generated video in Joules.
600
+ - **Batch latency (s)**: Time taken to generate a batch of videos in seconds.
601
+ - **Batch size**: Number of prompts/videos in a batch.
602
+ - **Denoising steps**: Number of denoising steps used for the diffusion model.
603
+ - **Frames**: Number of frames in the generated video.
604
+ - **Resolution**: Resolution of the generated video.
605
+
606
+ For more detailed information, please take a look at the **About** tab.
607
+ """
608
+ else:
609
+ text = """
610
+ Columns
611
+ - **Model**: The name of the model.
612
+ - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
613
+ - **GPU model**: Name of the GPU model used for benchmarking.
614
+ - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
615
+
616
+ Checking "Show more technical details" above the table will reveal more detailed columns.
617
+ Also, for more detailed information, please take a look at the **About** tab.
618
+ """
619
+ return text
620
 
621
  def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
622
  return {"Batch latency (s)": (0.0, 60.0, 1.0, 10.0)}
 
628
  def get_tab_name(self) -> str:
629
  return "Diffusion Image to video"
630
 
631
+ def get_intro_text(self) -> str:
632
  text = """
633
  <h2>Diffusion image-to-video generation</h2></br>
634
 
635
  <p style="font-size: 16px">
636
+ Diffusion models generate videos given an input image (and sometimes alongside with text).
637
+ Using <a href="https://ml.energy/zeus">Zeus</a> for energy measurement, we created a leaderboard for the energy consumption of Diffusion image-to-video.
638
  </p>
639
 
640
  <p style="font-size: 16px">
641
+ More models will be added over time. Stay tuned!
642
  </p>
643
  """
644
+ return text
645
+
646
+ def get_detail_text(self, detail_mode: bool) -> str:
647
+ if detail_mode:
648
+ text = """
649
+ Columns
650
+ - **Model**: The name of the model.
651
+ - **Denoising params**: Number of parameters in the denosing module (e.g., UNet, Transformer).
652
+ - **Total params**: Total number of parameters in the model, including encoders and decoders.
653
+ - **GPU**: Name of the GPU model used for benchmarking.
654
+ - **Energy/video (J)**: Energy consumed per generated video in Joules.
655
+ - **Batch latency (s)**: Time taken to generate a batch of videos in seconds.
656
+ - **Batch size**: Number of prompts/videos in a batch.
657
+ - **Denoising steps**: Number of denoising steps used for the diffusion model.
658
+ - **Frames**: Number of frames in the generated video.
659
+ - **Resolution**: Resolution of the generated video.
660
+
661
+ For more detailed information, please take a look at the **About** tab.
662
+ """
663
+ else:
664
+ text = """
665
+ Columns
666
+ - **Model**: The name of the model.
667
+ - **Denoising parameters (Billions)**: Number of parameters in the diffusion model's (core) denoising module. This part of the model is run repetitively to generate gradually refine the video.
668
+ - **GPU model**: Name of the GPU model used for benchmarking.
669
+ - **Energy per video (Joules)**: Energy consumed for each generated image in Joules.
670
+
671
+ Checking "Show more technical details" above the table will reveal more detailed columns.
672
+ Also, for more detailed information, please take a look at the **About** tab.
673
+ """
674
+ return text
675
 
676
  def get_benchmark_sliders(self) -> dict[str, tuple[float, float, float, float]]:
677
  return {"Batch latency (s)": (0.0, 120.0, 1.0, 45.0)}
 
718
  self.full_df = df
719
 
720
  # Default view of the table is to only show the first options.
721
+ self.set_filter_get_df(detail_mode=False)
722
 
723
  def _read_tables(self, data_dir: str) -> pd.DataFrame:
724
  """Read tables."""
 
777
  gr.Dropdown.update(choices=["None", *columns]),
778
  ]
779
 
780
+ def set_filter_get_df(self, detail_mode: bool, *filters) -> pd.DataFrame:
781
  """Set the current set of filters and return the filtered DataFrame."""
782
  # If the filter is empty, we default to the first choice for each key.
783
  if not filters:
 
794
  """Return the leaderboard's introduction text in HTML."""
795
  return """
796
  <div align="center">
797
+ <h2 style="color: #23d175">This is the legacy ML.ENERGY LLM leaderboard. This will be removed at the end of this year.</h2>
798
  </div>
799
 
800
  <h3>How much energy do modern Large Language Models (LLMs) consume for inference?</h3>
 
950
  #citation-header > div > span {
951
  font-size: 16px !important;
952
  }
953
+
954
+ /* Align everything in tables to the right. */
955
+ /* Not the best solution, but at least makes the numbers align. */
956
+ .tab-leaderboard span {
957
+ text-align: right;
958
+ }
959
  """
960
 
961
  # The app will not start without a controller address set.
 
1027
  # Colosseum event handlers
1028
  def on_load():
1029
  """Intialize the dataframe, shuffle the model preference dropdown choices."""
1030
+ dataframe = global_ltbm.set_filter_get_df(detail_mode=False)
1031
+ dataframes = [global_tbm.set_filter_get_df(detail_mode=False) for global_tbm in global_tbms]
1032
  return dataframe, *dataframes
1033
 
1034
 
 
1141
  ]
1142
 
1143
 
1144
+ def toggle_detail_mode_slider_visibility(detail_mode: bool, *sliders):
1145
+ return [detail_mode] + [gr.update(visible=detail_mode)] * len(sliders)
1146
+
1147
+
1148
+ def toggle_detail_mode_sync_tabs(detail_mode: bool, *checkboxes):
1149
+ return [gr.Checkbox.update(value=detail_mode)] * len(checkboxes) + [gr.Markdown.update(tbm.get_detail_text(detail_mode)) for tbm in global_tbms]
1150
+
1151
+
1152
  focus_prompt_input_js = """
1153
  function() {
1154
  for (let textarea of document.getElementsByTagName("textarea")) {
 
1163
  with gr.Blocks(css=custom_css) as block:
1164
  tbm = gr.State(global_ltbm) # type: ignore
1165
  local_tbms: list[TableManager] = [gr.State(global_tbm) for global_tbm in global_tbms] # type: ignore
1166
+ detail_mode = gr.State(False) # type: ignore
1167
 
1168
  with gr.Box():
1169
  gr.HTML(
 
1314
 
1315
  # Tab: Leaderboards.
1316
  dataframes = []
1317
+ all_detail_mode_checkboxes = []
1318
+ all_sliders = []
1319
+ all_detail_text_components = []
1320
  for global_tbm, local_tbm in zip(global_tbms, local_tbms):
1321
  with gr.Tab(global_tbm.get_tab_name()):
1322
  # Box: Introduction text.
1323
  with gr.Box():
1324
+ gr.Markdown(global_tbm.get_intro_text())
1325
+
1326
+ # Block: Checkboxes and sliders to select benchmarking parameters. A detail mode checkbox.
 
 
 
 
 
 
1327
  with gr.Row():
1328
  checkboxes: list[gr.CheckboxGroup] = []
1329
  for key, choices in global_tbm.get_benchmark_checkboxes().items():
 
1332
 
1333
  sliders: list[gr.Slider] = []
1334
  for key, (min_val, max_val, step, default) in global_tbm.get_benchmark_sliders().items():
1335
+ sliders.append(gr.Slider(minimum=min_val, maximum=max_val, value=default, step=step, label=key, visible=detail_mode.value))
1336
+ all_sliders.extend(sliders)
1337
+
1338
+ with gr.Row():
1339
+ detail_mode_checkbox = gr.Checkbox(label="Show more technical details", value=False)
1340
+ all_detail_mode_checkboxes.append(detail_mode_checkbox)
1341
 
1342
  # Block: Leaderboard table.
1343
  with gr.Row():
 
1345
  type="pandas",
1346
  elem_classes=["tab-leaderboard"],
1347
  interactive=False,
1348
+ max_rows=1000,
1349
  )
1350
  dataframes.append(dataframe)
1351
 
 
1354
  None, None, None, _js=dataframe_update_js, queue=False
1355
  )
1356
  # Table automatically updates when users check or uncheck any checkbox or move any slider.
1357
+ for element in [detail_mode_checkbox, *checkboxes, *sliders]:
1358
  element.change(
1359
  global_tbm.__class__.set_filter_get_df,
1360
+ inputs=[local_tbm, detail_mode, *checkboxes, *sliders],
1361
  outputs=dataframe,
1362
  queue=False,
1363
  )
1364
 
1365
  # Block: More details about the leaderboard.
1366
  with gr.Box():
1367
+ detail_text = global_tbm.get_detail_text(detail_mode=False)
1368
+ all_detail_text_components.append(gr.Markdown(detail_text))
 
 
 
 
 
1369
 
1370
  # Block: Leaderboard date.
1371
  with gr.Row():
 
1376
  # Tab: Legacy leaderboard.
1377
  with gr.Tab("LLM Leaderboard (legacy)"):
1378
  with gr.Box():
1379
+ gr.Markdown(global_ltbm.get_intro_text())
1380
 
1381
  # Block: Checkboxes to select benchmarking parameters.
1382
  with gr.Row():
 
1415
  with gr.Tab("About"):
1416
  gr.Markdown(open("docs/about.md").read())
1417
 
1418
+ # Detail mode toggling.
1419
+ for detail_mode_checkbox in all_detail_mode_checkboxes:
1420
+ detail_mode_checkbox.change(
1421
+ toggle_detail_mode_slider_visibility,
1422
+ inputs=[detail_mode_checkbox, *all_sliders],
1423
+ outputs=[detail_mode, *all_sliders],
1424
+ queue=False,
1425
+ )
1426
+ detail_mode_checkbox.change(
1427
+ toggle_detail_mode_sync_tabs,
1428
+ inputs=[detail_mode_checkbox, *all_detail_mode_checkboxes],
1429
+ outputs=[*all_detail_mode_checkboxes, *all_detail_text_components],
1430
+ queue=False,
1431
+ )
1432
+
1433
  # Citation
1434
  with gr.Accordion("📚 Citation", open=False, elem_id="citation-header"):
1435
  citation_text = open("docs/citation.bib").read()
data/diffusion/image-to-video/models.json CHANGED
@@ -2,22 +2,22 @@
2
  "ali-vilab/i2vgen-xl": {
3
  "url": "https://huggingface.co/ali-vilab/i2vgen-xl",
4
  "nickname": "I2VGen XL",
5
- "total_params": "2.5B",
6
- "denoising_params": "1.4B",
7
  "resolution": "1280x720"
8
  },
9
  "stabilityai/stable-video-diffusion-img2vid": {
10
  "url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid",
11
  "nickname": "Stable Video Diffusion",
12
- "total_params": "2.3B",
13
- "denoising_params": "1.5B",
14
  "resolution": "1024x576"
15
  },
16
  "stabilityai/stable-video-diffusion-img2vid-xt": {
17
  "url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt",
18
  "nickname": "Stable Video Diffusion xt",
19
- "total_params": "2.3B",
20
- "denoising_params": "1.5B",
21
  "resolution": "1024x576"
22
  }
23
  }
 
2
  "ali-vilab/i2vgen-xl": {
3
  "url": "https://huggingface.co/ali-vilab/i2vgen-xl",
4
  "nickname": "I2VGen XL",
5
+ "total_params": 2.5,
6
+ "denoising_params": 1.4,
7
  "resolution": "1280x720"
8
  },
9
  "stabilityai/stable-video-diffusion-img2vid": {
10
  "url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid",
11
  "nickname": "Stable Video Diffusion",
12
+ "total_params": 2.3,
13
+ "denoising_params": 1.5,
14
  "resolution": "1024x576"
15
  },
16
  "stabilityai/stable-video-diffusion-img2vid-xt": {
17
  "url": "https://huggingface.co/stabilityai/stable-video-diffusion-img2vid-xt",
18
  "nickname": "Stable Video Diffusion xt",
19
+ "total_params": 2.3,
20
+ "denoising_params": 1.5,
21
  "resolution": "1024x576"
22
  }
23
  }
data/diffusion/text-to-image/models.json CHANGED
@@ -2,57 +2,57 @@
2
  "kandinsky-community/kandinsky-2-2-decoder": {
3
  "url": "https://huggingface.co/kandinsky-community/kandinsky-2-2-decoder",
4
  "nickname": "Kandinsky 2.2",
5
- "total_params": "4.9B",
6
- "denoising_params": "1.3B",
7
  "resolution": "512x512"
8
  },
9
  "kandinsky-community/kandinsky-3": {
10
  "url": "https://huggingface.co/kandinsky-community/kandinsky-3",
11
  "nickname": "Kandinsky 3",
12
- "total_params": "12.0B",
13
- "denoising_params": "3.1B",
14
  "resolution": "1024x1024"
15
  },
16
  "prompthero/openjourney-v4": {
17
  "url": "https://huggingface.co/prompthero/openjourney-v4",
18
  "nickname": "Openjourney V4",
19
- "total_params": "1.1B",
20
- "denoising_params": "0.9B",
21
  "resolution": "512x512"
22
  },
23
  "segmind/SSD-1B": {
24
  "url": "https://huggingface.co/segmind/SSD-1B",
25
  "nickname": "SSD 1B",
26
- "total_params": "2.2B",
27
- "denoising_params": "1.3B",
28
  "resolution": "1024x1024"
29
  },
30
  "stabilityai/sdxl-turbo": {
31
  "url": "https://huggingface.co/stabilityai/sdxl-turbo",
32
  "nickname": "Stable Diffusion XL Turbo",
33
- "total_params": "3.5B",
34
- "denoising_params": "2.6B",
35
  "resolution": "512x512"
36
  },
37
  "stabilityai/stable-diffusion-2-1": {
38
  "url": "https://huggingface.co/stabilityai/stable-diffusion-2-1",
39
  "nickname": "Stable Diffusion 2.1",
40
- "total_params": "1.3B",
41
- "denoising_params": "0.9B",
42
  "resolution": "768x768"
43
  },
44
  "stabilityai/stable-diffusion-3-medium-diffusers": {
45
  "url": "https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers",
46
  "nickname": "Stable Diffusion 3 Medium",
47
- "total_params": "7.7B",
48
- "denoising_params": "2.0B",
49
  "resolution": "1024x1024"
50
  },
51
  "stabilityai/stable-diffusion-xl-base-1.0": {
52
  "url": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
53
  "nickname": "Stable Diffusion XL Base 1.0",
54
- "total_params": "3.5B",
55
- "denoising_params": "2.6B",
56
  "resolution": "1024x1024"
57
  }
58
  }
 
2
  "kandinsky-community/kandinsky-2-2-decoder": {
3
  "url": "https://huggingface.co/kandinsky-community/kandinsky-2-2-decoder",
4
  "nickname": "Kandinsky 2.2",
5
+ "total_params": 4.9,
6
+ "denoising_params": 1.3,
7
  "resolution": "512x512"
8
  },
9
  "kandinsky-community/kandinsky-3": {
10
  "url": "https://huggingface.co/kandinsky-community/kandinsky-3",
11
  "nickname": "Kandinsky 3",
12
+ "total_params": 12.0,
13
+ "denoising_params": 3.1,
14
  "resolution": "1024x1024"
15
  },
16
  "prompthero/openjourney-v4": {
17
  "url": "https://huggingface.co/prompthero/openjourney-v4",
18
  "nickname": "Openjourney V4",
19
+ "total_params": 1.1,
20
+ "denoising_params": 0.9,
21
  "resolution": "512x512"
22
  },
23
  "segmind/SSD-1B": {
24
  "url": "https://huggingface.co/segmind/SSD-1B",
25
  "nickname": "SSD 1B",
26
+ "total_params": 2.2,
27
+ "denoising_params": 1.3,
28
  "resolution": "1024x1024"
29
  },
30
  "stabilityai/sdxl-turbo": {
31
  "url": "https://huggingface.co/stabilityai/sdxl-turbo",
32
  "nickname": "Stable Diffusion XL Turbo",
33
+ "total_params": 3.5,
34
+ "denoising_params": 2.6,
35
  "resolution": "512x512"
36
  },
37
  "stabilityai/stable-diffusion-2-1": {
38
  "url": "https://huggingface.co/stabilityai/stable-diffusion-2-1",
39
  "nickname": "Stable Diffusion 2.1",
40
+ "total_params": 1.3,
41
+ "denoising_params": 0.9,
42
  "resolution": "768x768"
43
  },
44
  "stabilityai/stable-diffusion-3-medium-diffusers": {
45
  "url": "https://huggingface.co/stabilityai/stable-diffusion-3-medium-diffusers",
46
  "nickname": "Stable Diffusion 3 Medium",
47
+ "total_params": 7.7,
48
+ "denoising_params": 2.0,
49
  "resolution": "1024x1024"
50
  },
51
  "stabilityai/stable-diffusion-xl-base-1.0": {
52
  "url": "https://huggingface.co/stabilityai/stable-diffusion-xl-base-1.0",
53
  "nickname": "Stable Diffusion XL Base 1.0",
54
+ "total_params": 3.5,
55
+ "denoising_params": 2.6,
56
  "resolution": "1024x1024"
57
  }
58
  }
data/diffusion/text-to-video/models.json CHANGED
@@ -2,15 +2,15 @@
2
  "ali-vilab/text-to-video-ms-1.7b": {
3
  "url": "https://huggingface.co/ali-vilab/text-to-video-ms-1.7b",
4
  "nickname": "ModelScope T2V",
5
- "total_params": "1.8B",
6
- "denoising_params": "1.4B",
7
  "resolution": "256x256"
8
  },
9
  "guoyww/animatediff-motion-adapter-v1-5-3": {
10
  "url": "https://huggingface.co/guoyww/animatediff-motion-adapter-v1-5-3",
11
  "nickname": "Animatediff",
12
- "total_params": "1.9B",
13
- "denoising_params": "1.3B",
14
  "resolution": "512x512"
15
  }
16
  }
 
2
  "ali-vilab/text-to-video-ms-1.7b": {
3
  "url": "https://huggingface.co/ali-vilab/text-to-video-ms-1.7b",
4
  "nickname": "ModelScope T2V",
5
+ "total_params": 1.8,
6
+ "denoising_params": 1.4,
7
  "resolution": "256x256"
8
  },
9
  "guoyww/animatediff-motion-adapter-v1-5-3": {
10
  "url": "https://huggingface.co/guoyww/animatediff-motion-adapter-v1-5-3",
11
  "nickname": "Animatediff",
12
+ "total_params": 1.9,
13
+ "denoising_params": 1.3,
14
  "resolution": "512x512"
15
  }
16
  }
data/llm_text_generation/chat/models.json CHANGED
@@ -2,71 +2,71 @@
2
  "google/gemma-2-27b-it": {
3
  "url": "https://huggingface.co/google/gemma-2-27b-it",
4
  "nickname": "Gemma 2 27B",
5
- "params": "27B"
6
  },
7
  "google/gemma-2-2b-it": {
8
  "url": "https://huggingface.co/google/gemma-2-2b-it",
9
  "nickname": "Gemma 2 2B",
10
- "params": "2B"
11
  },
12
  "google/gemma-2-9b-it": {
13
  "url": "https://huggingface.co/google/gemma-2-9b-it",
14
  "nickname": "Gemma 2 9B",
15
- "params": "9B"
16
  },
17
  "meta-llama/Meta-Llama-3.1-70B-Instruct": {
18
  "url": "https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct",
19
  "nickname": "Llama 3.1 70B",
20
- "params": "70B"
21
  },
22
  "meta-llama/Meta-Llama-3.1-405B-Instruct": {
23
  "url": "https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct",
24
  "nickname": "Llama 3.1 405B",
25
- "params": "405B"
26
  },
27
  "meta-llama/Meta-Llama-3.1-8B-Instruct": {
28
  "url": "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct",
29
  "nickname": "Llama 3.1 8B",
30
- "params": "8B"
31
  },
32
  "microsoft/Phi-3-medium-4k-instruct": {
33
  "url": "https://huggingface.co/microsoft/Phi-3-medium-4k-instruct",
34
  "nickname": "Phi 3 Medium",
35
- "params": "14B"
36
  },
37
  "microsoft/Phi-3-mini-4k-instruct": {
38
  "url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
39
  "nickname": "Phi 3 Mini",
40
- "params": "4B"
41
  },
42
  "microsoft/Phi-3-small-8k-instruct": {
43
  "url": "https://huggingface.co/microsoft/Phi-3-small-8k-instruct",
44
  "nickname": "Phi 3 Small",
45
- "params": "7B"
46
  },
47
  "mistralai/Mistral-7B-Instruct-v0.3": {
48
  "url": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3",
49
  "nickname": "Mistral 7B",
50
- "params": "7B"
51
  },
52
  "mistralai/Mistral-Large-Instruct-2407": {
53
  "url": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
54
  "nickname": "Mistral Large",
55
- "params": "123B"
56
  },
57
  "mistralai/Mistral-Nemo-Instruct-2407": {
58
  "url": "https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407",
59
  "nickname": "Mistral Nemo",
60
- "params": "12B"
61
  },
62
  "mistralai/Mixtral-8x22B-Instruct-v0.1": {
63
  "url": "https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1",
64
  "nickname": "Mixtral 8x22B",
65
- "params": "141B"
66
  },
67
  "mistralai/Mixtral-8x7B-Instruct-v0.1": {
68
  "url": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
69
  "nickname": "Mixtral 8x7B",
70
- "params": "47B"
71
  }
72
  }
 
2
  "google/gemma-2-27b-it": {
3
  "url": "https://huggingface.co/google/gemma-2-27b-it",
4
  "nickname": "Gemma 2 27B",
5
+ "params": 27
6
  },
7
  "google/gemma-2-2b-it": {
8
  "url": "https://huggingface.co/google/gemma-2-2b-it",
9
  "nickname": "Gemma 2 2B",
10
+ "params": 2
11
  },
12
  "google/gemma-2-9b-it": {
13
  "url": "https://huggingface.co/google/gemma-2-9b-it",
14
  "nickname": "Gemma 2 9B",
15
+ "params": 9
16
  },
17
  "meta-llama/Meta-Llama-3.1-70B-Instruct": {
18
  "url": "https://huggingface.co/meta-llama/Meta-Llama-3.1-70B-Instruct",
19
  "nickname": "Llama 3.1 70B",
20
+ "params": 70
21
  },
22
  "meta-llama/Meta-Llama-3.1-405B-Instruct": {
23
  "url": "https://huggingface.co/meta-llama/Meta-Llama-3.1-405B-Instruct",
24
  "nickname": "Llama 3.1 405B",
25
+ "params": 405
26
  },
27
  "meta-llama/Meta-Llama-3.1-8B-Instruct": {
28
  "url": "https://huggingface.co/meta-llama/Meta-Llama-3.1-8B-Instruct",
29
  "nickname": "Llama 3.1 8B",
30
+ "params": 8
31
  },
32
  "microsoft/Phi-3-medium-4k-instruct": {
33
  "url": "https://huggingface.co/microsoft/Phi-3-medium-4k-instruct",
34
  "nickname": "Phi 3 Medium",
35
+ "params": 14
36
  },
37
  "microsoft/Phi-3-mini-4k-instruct": {
38
  "url": "https://huggingface.co/microsoft/Phi-3-mini-4k-instruct",
39
  "nickname": "Phi 3 Mini",
40
+ "params": 4
41
  },
42
  "microsoft/Phi-3-small-8k-instruct": {
43
  "url": "https://huggingface.co/microsoft/Phi-3-small-8k-instruct",
44
  "nickname": "Phi 3 Small",
45
+ "params": 7
46
  },
47
  "mistralai/Mistral-7B-Instruct-v0.3": {
48
  "url": "https://huggingface.co/mistralai/Mistral-7B-Instruct-v0.3",
49
  "nickname": "Mistral 7B",
50
+ "params": 7
51
  },
52
  "mistralai/Mistral-Large-Instruct-2407": {
53
  "url": "https://huggingface.co/mistralai/Mistral-Large-Instruct-2407",
54
  "nickname": "Mistral Large",
55
+ "params": 123
56
  },
57
  "mistralai/Mistral-Nemo-Instruct-2407": {
58
  "url": "https://huggingface.co/mistralai/Mistral-Nemo-Instruct-2407",
59
  "nickname": "Mistral Nemo",
60
+ "params": 12
61
  },
62
  "mistralai/Mixtral-8x22B-Instruct-v0.1": {
63
  "url": "https://huggingface.co/mistralai/Mixtral-8x22B-Instruct-v0.1",
64
  "nickname": "Mixtral 8x22B",
65
+ "params": 141
66
  },
67
  "mistralai/Mixtral-8x7B-Instruct-v0.1": {
68
  "url": "https://huggingface.co/mistralai/Mixtral-8x7B-Instruct-v0.1",
69
  "nickname": "Mixtral 8x7B",
70
+ "params": 47
71
  }
72
  }
data/llm_text_generation/code/models.json CHANGED
@@ -2,46 +2,46 @@
2
  "bigcode/starcoder2-15b": {
3
  "url": "https://huggingface.co/bigcode/starcoder2-15b",
4
  "nickname": "Starcoder2 15B",
5
- "params": "15B"
6
  },
7
  "bigcode/starcoder2-3b": {
8
  "url": "https://huggingface.co/bigcode/starcoder2-3b",
9
  "nickname": "Starcoder2 3B",
10
- "params": "3B"
11
  },
12
  "bigcode/starcoder2-7b": {
13
  "url": "https://huggingface.co/bigcode/starcoder2-7b",
14
  "nickname": "Starcoder2 7B",
15
- "params": "7B"
16
  },
17
  "codellama/CodeLlama-13b-hf": {
18
  "url": "https://huggingface.co/codellama/CodeLlama-13b-hf",
19
  "nickname": "CodeLlama 13B",
20
- "params": "13B"
21
  },
22
  "codellama/CodeLlama-34b-hf": {
23
  "url": "https://huggingface.co/codellama/CodeLlama-34b-hf",
24
  "nickname": "CodeLlama 34B",
25
- "params": "34B"
26
  },
27
  "codellama/CodeLlama-70b-hf": {
28
  "url": "https://huggingface.co/codellama/CodeLlama-70b-hf",
29
  "nickname": "CodeLlama 70B",
30
- "params": "70B"
31
  },
32
  "codellama/CodeLlama-7b-hf": {
33
  "url": "https://huggingface.co/codellama/CodeLlama-7b-hf",
34
  "nickname": "CodeLlama 7B",
35
- "params": "7B"
36
  },
37
  "google/codegemma-1.1-2b": {
38
  "url": "https://huggingface.co/google/codegemma-1.1-2b",
39
  "nickname": "CodeGemma 2B",
40
- "params": "2B"
41
  },
42
  "google/codegemma-7b": {
43
  "url": "https://huggingface.co/google/codegemma-7b",
44
  "nickname": "CodeGemma 7B",
45
- "params": "7B"
46
  }
47
  }
 
2
  "bigcode/starcoder2-15b": {
3
  "url": "https://huggingface.co/bigcode/starcoder2-15b",
4
  "nickname": "Starcoder2 15B",
5
+ "params": 15
6
  },
7
  "bigcode/starcoder2-3b": {
8
  "url": "https://huggingface.co/bigcode/starcoder2-3b",
9
  "nickname": "Starcoder2 3B",
10
+ "params": 3
11
  },
12
  "bigcode/starcoder2-7b": {
13
  "url": "https://huggingface.co/bigcode/starcoder2-7b",
14
  "nickname": "Starcoder2 7B",
15
+ "params": 7
16
  },
17
  "codellama/CodeLlama-13b-hf": {
18
  "url": "https://huggingface.co/codellama/CodeLlama-13b-hf",
19
  "nickname": "CodeLlama 13B",
20
+ "params": 13
21
  },
22
  "codellama/CodeLlama-34b-hf": {
23
  "url": "https://huggingface.co/codellama/CodeLlama-34b-hf",
24
  "nickname": "CodeLlama 34B",
25
+ "params": 34
26
  },
27
  "codellama/CodeLlama-70b-hf": {
28
  "url": "https://huggingface.co/codellama/CodeLlama-70b-hf",
29
  "nickname": "CodeLlama 70B",
30
+ "params": 70
31
  },
32
  "codellama/CodeLlama-7b-hf": {
33
  "url": "https://huggingface.co/codellama/CodeLlama-7b-hf",
34
  "nickname": "CodeLlama 7B",
35
+ "params": 7
36
  },
37
  "google/codegemma-1.1-2b": {
38
  "url": "https://huggingface.co/google/codegemma-1.1-2b",
39
  "nickname": "CodeGemma 2B",
40
+ "params": 2
41
  },
42
  "google/codegemma-7b": {
43
  "url": "https://huggingface.co/google/codegemma-7b",
44
  "nickname": "CodeGemma 7B",
45
+ "params": 7
46
  }
47
  }
data/mllm_text_generation/chat/models.json CHANGED
@@ -2,31 +2,31 @@
2
  "facebook/chameleon-30b": {
3
  "url": "https://huggingface.co/facebook/chameleon-30b",
4
  "nickname": "Chameleon 30B",
5
- "params": "34B"
6
  },
7
  "facebook/chameleon-7b": {
8
  "url": "https://huggingface.co/facebook/chameleon-7b",
9
  "nickname": "Chameleon 7B",
10
- "params": "7B"
11
  },
12
  "llava-hf/llama3-llava-next-8b-hf": {
13
  "url": "https://huggingface.co/llava-hf/llama3-llava-next-8b-hf",
14
  "nickname": "LLaVA NeXT 8B",
15
- "params": "8B"
16
  },
17
  "llava-hf/llava-1.5-13b-hf": {
18
  "url": "https://huggingface.co/llava-hf/llava-1.5-13b-hf",
19
  "nickname": "LLaVA 1.5 13B",
20
- "params": "13B"
21
  },
22
  "llava-hf/llava-1.5-7b-hf": {
23
  "url": "https://huggingface.co/llava-hf/llava-1.5-7b-hf",
24
  "nickname": "LLaVA 1.5 7B",
25
- "params": "7B"
26
  },
27
  "microsoft/Phi-3-vision-128k-instruct": {
28
  "url": "https://huggingface.co/microsoft/Phi-3-vision-128k-instruct",
29
  "nickname": "Phi 3 Vision",
30
- "params": "4B"
31
  }
32
  }
 
2
  "facebook/chameleon-30b": {
3
  "url": "https://huggingface.co/facebook/chameleon-30b",
4
  "nickname": "Chameleon 30B",
5
+ "params": 34
6
  },
7
  "facebook/chameleon-7b": {
8
  "url": "https://huggingface.co/facebook/chameleon-7b",
9
  "nickname": "Chameleon 7B",
10
+ "params": 7
11
  },
12
  "llava-hf/llama3-llava-next-8b-hf": {
13
  "url": "https://huggingface.co/llava-hf/llama3-llava-next-8b-hf",
14
  "nickname": "LLaVA NeXT 8B",
15
+ "params": 8
16
  },
17
  "llava-hf/llava-1.5-13b-hf": {
18
  "url": "https://huggingface.co/llava-hf/llava-1.5-13b-hf",
19
  "nickname": "LLaVA 1.5 13B",
20
+ "params": 13
21
  },
22
  "llava-hf/llava-1.5-7b-hf": {
23
  "url": "https://huggingface.co/llava-hf/llava-1.5-7b-hf",
24
  "nickname": "LLaVA 1.5 7B",
25
+ "params": 7
26
  },
27
  "microsoft/Phi-3-vision-128k-instruct": {
28
  "url": "https://huggingface.co/microsoft/Phi-3-vision-128k-instruct",
29
  "nickname": "Phi 3 Vision",
30
+ "params": 4
31
  }
32
  }