Tom Aarsen commited on
Commit
ab565ba
·
1 Parent(s): e2b41c8

Add search bar/filtering; always show Model Size

Browse files
Files changed (1) hide show
  1. app.py +148 -61
app.py CHANGED
@@ -1,6 +1,7 @@
1
- from functools import partial
2
  import json
3
  import os
 
4
 
5
  from datasets import load_dataset
6
  import gradio as gr
@@ -1098,7 +1099,7 @@ def add_rank(df):
1098
  if len(cols_to_rank) == 1:
1099
  df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
1100
  else:
1101
- df.insert(1, "Average", df[cols_to_rank].mean(axis=1, skipna=False))
1102
  df.sort_values("Average", ascending=False, inplace=True)
1103
  df.insert(0, "Rank", list(range(1, len(df) + 1)))
1104
  df = df.round(2)
@@ -1106,7 +1107,7 @@ def add_rank(df):
1106
  df.fillna("", inplace=True)
1107
  return df
1108
 
1109
- def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=False, task_to_metric=TASK_TO_METRIC, rank=True):
1110
  api = HfApi()
1111
  models = api.list_models(filter="mteb")
1112
  # Initialize list to models that we cannot fetch metadata from
@@ -1169,6 +1170,8 @@ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_
1169
  except:
1170
  pass
1171
  df_list.append(out)
 
 
1172
  df = pd.DataFrame(df_list)
1173
  # If there are any models that are the same, merge them
1174
  # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
@@ -1217,26 +1220,26 @@ def get_mteb_average():
1217
 
1218
  DATA_OVERALL = DATA_OVERALL.round(2)
1219
 
1220
- DATA_CLASSIFICATION_EN = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_CLASSIFICATION])
1221
  # Only keep rows with at least one score in addition to the "Model" & rank column
1222
- DATA_CLASSIFICATION_EN = DATA_CLASSIFICATION_EN[DATA_CLASSIFICATION_EN.iloc[:, 2:].ne("").any(axis=1)]
1223
 
1224
- DATA_CLUSTERING = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_CLUSTERING])
1225
- DATA_CLUSTERING = DATA_CLUSTERING[DATA_CLUSTERING.iloc[:, 2:].ne("").any(axis=1)]
1226
 
1227
- DATA_PAIR_CLASSIFICATION = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION])
1228
- DATA_PAIR_CLASSIFICATION = DATA_PAIR_CLASSIFICATION[DATA_PAIR_CLASSIFICATION.iloc[:, 2:].ne("").any(axis=1)]
1229
 
1230
- DATA_RERANKING = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_RERANKING])
1231
- DATA_RERANKING = DATA_RERANKING[DATA_RERANKING.iloc[:, 2:].ne("").any(axis=1)]
1232
 
1233
- DATA_RETRIEVAL = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_RETRIEVAL])
1234
- DATA_RETRIEVAL = DATA_RETRIEVAL[DATA_RETRIEVAL.iloc[:, 2:].ne("").any(axis=1)]
1235
 
1236
- DATA_STS_EN = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_STS])
1237
- DATA_STS_EN = DATA_STS_EN[DATA_STS_EN.iloc[:, 2:].ne("").any(axis=1)]
1238
 
1239
- DATA_SUMMARIZATION = add_rank(DATA_OVERALL[["Model"] + TASK_LIST_SUMMARIZATION])
1240
  DATA_SUMMARIZATION = DATA_SUMMARIZATION[DATA_SUMMARIZATION.iloc[:, 1:].ne("").any(axis=1)]
1241
 
1242
  # Fill NaN after averaging
@@ -1279,24 +1282,24 @@ def get_mteb_average_zh():
1279
 
1280
  DATA_OVERALL_ZH = DATA_OVERALL_ZH.round(2)
1281
 
1282
- DATA_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_CLASSIFICATION_ZH])
1283
  # Only keep rows with at least one score in addition to the "Model" & rank column
1284
- DATA_CLASSIFICATION_ZH = DATA_CLASSIFICATION_ZH[DATA_CLASSIFICATION_ZH.iloc[:, 2:].ne("").any(axis=1)]
1285
 
1286
- DATA_CLUSTERING_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_CLUSTERING_ZH])
1287
- DATA_CLUSTERING_ZH = DATA_CLUSTERING_ZH[DATA_CLUSTERING_ZH.iloc[:, 2:].ne("").any(axis=1)]
1288
 
1289
- DATA_PAIR_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_PAIR_CLASSIFICATION_ZH])
1290
- DATA_PAIR_CLASSIFICATION_ZH = DATA_PAIR_CLASSIFICATION_ZH[DATA_PAIR_CLASSIFICATION_ZH.iloc[:, 2:].ne("").any(axis=1)]
1291
 
1292
- DATA_RERANKING_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_RERANKING_ZH])
1293
- DATA_RERANKING_ZH = DATA_RERANKING_ZH[DATA_RERANKING_ZH.iloc[:, 2:].ne("").any(axis=1)]
1294
 
1295
- DATA_RETRIEVAL_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_RETRIEVAL_ZH])
1296
- DATA_RETRIEVAL_ZH = DATA_RETRIEVAL_ZH[DATA_RETRIEVAL_ZH.iloc[:, 2:].ne("").any(axis=1)]
1297
 
1298
- DATA_STS_ZH = add_rank(DATA_OVERALL_ZH[["Model"] + TASK_LIST_STS_ZH])
1299
- DATA_STS_ZH = DATA_STS_ZH[DATA_STS_ZH.iloc[:, 2:].ne("").any(axis=1)]
1300
 
1301
  # Fill NaN after averaging
1302
  DATA_OVERALL_ZH.fillna("", inplace=True)
@@ -1339,25 +1342,25 @@ def get_mteb_average_fr():
1339
  DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1)))
1340
  DATA_OVERALL_FR = DATA_OVERALL_FR.round(2)
1341
 
1342
- DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_CLASSIFICATION_FR])
1343
- DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:, 2:].ne("").any(axis=1)]
1344
 
1345
- DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_CLUSTERING_FR])
1346
- DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:, 2:].ne("").any(axis=1)]
1347
 
1348
- DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_PAIR_CLASSIFICATION_FR])
1349
- DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:, 2:].ne("").any(axis=1)]
1350
 
1351
- DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_RERANKING_FR])
1352
- DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:, 2:].ne("").any(axis=1)]
1353
 
1354
- DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_RETRIEVAL_FR])
1355
- DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:, 2:].ne("").any(axis=1)]
1356
 
1357
- DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_STS_FR])
1358
- DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:, 2:].ne("").any(axis=1)]
1359
 
1360
- DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_SUMMARIZATION_FR])
1361
  DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)]
1362
 
1363
  # Fill NaN after averaging
@@ -1398,21 +1401,21 @@ def get_mteb_average_pl():
1398
 
1399
  DATA_OVERALL_PL = DATA_OVERALL_PL.round(2)
1400
 
1401
- DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_CLASSIFICATION_PL])
1402
  # Only keep rows with at least one score in addition to the "Model" & rank column
1403
- DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:, 2:].ne("").any(axis=1)]
1404
 
1405
- DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_CLUSTERING_PL])
1406
- DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:, 2:].ne("").any(axis=1)]
1407
 
1408
- DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_PAIR_CLASSIFICATION_PL])
1409
- DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:, 2:].ne("").any(axis=1)]
1410
 
1411
- DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_RETRIEVAL_PL])
1412
- DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:, 2:].ne("").any(axis=1)]
1413
 
1414
- DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model"] + TASK_LIST_STS_PL])
1415
- DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:, 2:].ne("").any(axis=1)]
1416
 
1417
  # Fill NaN after averaging
1418
  DATA_OVERALL_PL.fillna("", inplace=True)
@@ -1426,14 +1429,14 @@ get_mteb_average()
1426
  get_mteb_average_fr()
1427
  get_mteb_average_pl()
1428
  get_mteb_average_zh()
1429
- DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
1430
- DATA_BITEXT_MINING_DA = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_DA)
1431
- DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)
1432
- DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_NB)
1433
- DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)
1434
- DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)
1435
- DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)
1436
- DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)
1437
 
1438
  # Exact, add all non-nan integer values for every dataset
1439
  NUM_SCORES = 0
@@ -1476,7 +1479,7 @@ for d in [
1476
  DATA_SUMMARIZATION_FR,
1477
  ]:
1478
  # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
1479
- cols_to_ignore = 3 if "Average" in d.columns else 2
1480
  # Count number of scores including only non-nan floats & excluding the rank column
1481
  NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum()
1482
  # Exclude rank & model name column (first two); Do not count different language versions as different datasets
@@ -1491,6 +1494,7 @@ NUM_MODELS = len(set(MODELS))
1491
  # 1. Force headers to wrap
1492
  # 2. Force model column (maximum) width
1493
  # 3. Prevent model column from overflowing, scroll instead
 
1494
  css = """
1495
  table > thead {
1496
  white-space: normal
@@ -1503,6 +1507,10 @@ table {
1503
  table > tbody > tr > td:nth-child(2) > div {
1504
  overflow-x: auto
1505
  }
 
 
 
 
1506
  """
1507
 
1508
  """
@@ -1822,6 +1830,7 @@ data = {
1822
  }
1823
 
1824
  dataframes = []
 
1825
  tabs = []
1826
 
1827
  # The following JavaScript function updates the URL parameters based on the selected task and language
@@ -1854,6 +1863,57 @@ def update_url_language(event: gr.SelectData, current_task_language: dict, langu
1854
  language_per_task[current_task_language["task"]] = event.target.id
1855
  return current_task_language, language_per_task
1856
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1857
  with gr.Blocks(css=css) as block:
1858
 
1859
  # Store the current task and language for updating the URL. This is a bit hacky, but it works
@@ -1865,6 +1925,26 @@ with gr.Blocks(css=css) as block:
1865
  Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
1866
  """)
1867
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1868
  with gr.Tabs() as outer_tabs:
1869
  # Store the tabs for updating them on load based on URL parameters
1870
  tabs.append(outer_tabs)
@@ -1901,9 +1981,12 @@ with gr.Blocks(css=css) as block:
1901
 
1902
  with gr.Row():
1903
  datatype = ["number", "markdown"] + ["number"] * len(item["data"])
1904
- dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", height=600)
1905
  dataframes.append(dataframe)
1906
 
 
 
 
1907
  with gr.Row():
1908
  refresh_button = gr.Button("Refresh")
1909
  refresh_button.click(item["refresh"], inputs=None, outputs=dataframe)
@@ -1950,6 +2033,10 @@ with gr.Blocks(css=css) as block:
1950
 
1951
  block.load(set_tabs_on_load, inputs=[], outputs=tabs + [current_task_language, language_per_task])
1952
 
 
 
 
 
1953
  block.queue(max_size=10)
1954
  block.launch()
1955
 
 
1
+ from functools import partial, reduce
2
  import json
3
  import os
4
+ import re
5
 
6
  from datasets import load_dataset
7
  import gradio as gr
 
1099
  if len(cols_to_rank) == 1:
1100
  df.sort_values(cols_to_rank[0], ascending=False, inplace=True)
1101
  else:
1102
+ df.insert(len(df.columns) - len(cols_to_rank), "Average", df[cols_to_rank].mean(axis=1, skipna=False))
1103
  df.sort_values("Average", ascending=False, inplace=True)
1104
  df.insert(0, "Rank", list(range(1, len(df) + 1)))
1105
  df = df.round(2)
 
1107
  df.fillna("", inplace=True)
1108
  return df
1109
 
1110
+ def get_mteb_data(tasks=["Clustering"], langs=[], datasets=[], fillna=True, add_emb_dim=True, task_to_metric=TASK_TO_METRIC, rank=True):
1111
  api = HfApi()
1112
  models = api.list_models(filter="mteb")
1113
  # Initialize list to models that we cannot fetch metadata from
 
1170
  except:
1171
  pass
1172
  df_list.append(out)
1173
+ if len(df_list) >= 1:
1174
+ break
1175
  df = pd.DataFrame(df_list)
1176
  # If there are any models that are the same, merge them
1177
  # E.g. if out["Model"] has the same value in two places, merge & take whichever one is not NaN else just take the first one
 
1220
 
1221
  DATA_OVERALL = DATA_OVERALL.round(2)
1222
 
1223
+ DATA_CLASSIFICATION_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION])
1224
  # Only keep rows with at least one score in addition to the "Model" & rank column
1225
+ DATA_CLASSIFICATION_EN = DATA_CLASSIFICATION_EN[DATA_CLASSIFICATION_EN.iloc[:, 3:].ne("").any(axis=1)]
1226
 
1227
+ DATA_CLUSTERING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING])
1228
+ DATA_CLUSTERING = DATA_CLUSTERING[DATA_CLUSTERING.iloc[:, 3:].ne("").any(axis=1)]
1229
 
1230
+ DATA_PAIR_CLASSIFICATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION])
1231
+ DATA_PAIR_CLASSIFICATION = DATA_PAIR_CLASSIFICATION[DATA_PAIR_CLASSIFICATION.iloc[:, 3:].ne("").any(axis=1)]
1232
 
1233
+ DATA_RERANKING = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RERANKING])
1234
+ DATA_RERANKING = DATA_RERANKING[DATA_RERANKING.iloc[:, 3:].ne("").any(axis=1)]
1235
 
1236
+ DATA_RETRIEVAL = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL])
1237
+ DATA_RETRIEVAL = DATA_RETRIEVAL[DATA_RETRIEVAL.iloc[:, 3:].ne("").any(axis=1)]
1238
 
1239
+ DATA_STS_EN = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS])
1240
+ DATA_STS_EN = DATA_STS_EN[DATA_STS_EN.iloc[:, 3:].ne("").any(axis=1)]
1241
 
1242
+ DATA_SUMMARIZATION = add_rank(DATA_OVERALL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_SUMMARIZATION])
1243
  DATA_SUMMARIZATION = DATA_SUMMARIZATION[DATA_SUMMARIZATION.iloc[:, 1:].ne("").any(axis=1)]
1244
 
1245
  # Fill NaN after averaging
 
1282
 
1283
  DATA_OVERALL_ZH = DATA_OVERALL_ZH.round(2)
1284
 
1285
+ DATA_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION_ZH])
1286
  # Only keep rows with at least one score in addition to the "Model" & rank column
1287
+ DATA_CLASSIFICATION_ZH = DATA_CLASSIFICATION_ZH[DATA_CLASSIFICATION_ZH.iloc[:, 3:].ne("").any(axis=1)]
1288
 
1289
+ DATA_CLUSTERING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING_ZH])
1290
+ DATA_CLUSTERING_ZH = DATA_CLUSTERING_ZH[DATA_CLUSTERING_ZH.iloc[:, 3:].ne("").any(axis=1)]
1291
 
1292
+ DATA_PAIR_CLASSIFICATION_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION_ZH])
1293
+ DATA_PAIR_CLASSIFICATION_ZH = DATA_PAIR_CLASSIFICATION_ZH[DATA_PAIR_CLASSIFICATION_ZH.iloc[:, 3:].ne("").any(axis=1)]
1294
 
1295
+ DATA_RERANKING_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RERANKING_ZH])
1296
+ DATA_RERANKING_ZH = DATA_RERANKING_ZH[DATA_RERANKING_ZH.iloc[:, 3:].ne("").any(axis=1)]
1297
 
1298
+ DATA_RETRIEVAL_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL_ZH])
1299
+ DATA_RETRIEVAL_ZH = DATA_RETRIEVAL_ZH[DATA_RETRIEVAL_ZH.iloc[:, 3:].ne("").any(axis=1)]
1300
 
1301
+ DATA_STS_ZH = add_rank(DATA_OVERALL_ZH[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS_ZH])
1302
+ DATA_STS_ZH = DATA_STS_ZH[DATA_STS_ZH.iloc[:, 3:].ne("").any(axis=1)]
1303
 
1304
  # Fill NaN after averaging
1305
  DATA_OVERALL_ZH.fillna("", inplace=True)
 
1342
  DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1)))
1343
  DATA_OVERALL_FR = DATA_OVERALL_FR.round(2)
1344
 
1345
+ DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION_FR])
1346
+ DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:, 3:].ne("").any(axis=1)]
1347
 
1348
+ DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING_FR])
1349
+ DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:, 3:].ne("").any(axis=1)]
1350
 
1351
+ DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION_FR])
1352
+ DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:, 3:].ne("").any(axis=1)]
1353
 
1354
+ DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RERANKING_FR])
1355
+ DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:, 3:].ne("").any(axis=1)]
1356
 
1357
+ DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL_FR])
1358
+ DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:, 3:].ne("").any(axis=1)]
1359
 
1360
+ DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS_FR])
1361
+ DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:, 3:].ne("").any(axis=1)]
1362
 
1363
+ DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model", "Model Size (Million Parameters)"] + TASK_LIST_SUMMARIZATION_FR])
1364
  DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)]
1365
 
1366
  # Fill NaN after averaging
 
1401
 
1402
  DATA_OVERALL_PL = DATA_OVERALL_PL.round(2)
1403
 
1404
+ DATA_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLASSIFICATION_PL])
1405
  # Only keep rows with at least one score in addition to the "Model" & rank column
1406
+ DATA_CLASSIFICATION_PL = DATA_CLASSIFICATION_PL[DATA_CLASSIFICATION_PL.iloc[:, 3:].ne("").any(axis=1)]
1407
 
1408
+ DATA_CLUSTERING_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_CLUSTERING_PL])
1409
+ DATA_CLUSTERING_PL = DATA_CLUSTERING_PL[DATA_CLUSTERING_PL.iloc[:, 3:].ne("").any(axis=1)]
1410
 
1411
+ DATA_PAIR_CLASSIFICATION_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_PAIR_CLASSIFICATION_PL])
1412
+ DATA_PAIR_CLASSIFICATION_PL = DATA_PAIR_CLASSIFICATION_PL[DATA_PAIR_CLASSIFICATION_PL.iloc[:, 3:].ne("").any(axis=1)]
1413
 
1414
+ DATA_RETRIEVAL_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_RETRIEVAL_PL])
1415
+ DATA_RETRIEVAL_PL = DATA_RETRIEVAL_PL[DATA_RETRIEVAL_PL.iloc[:, 3:].ne("").any(axis=1)]
1416
 
1417
+ DATA_STS_PL = add_rank(DATA_OVERALL_PL[["Model", "Model Size (Million Parameters)"] + TASK_LIST_STS_PL])
1418
+ DATA_STS_PL = DATA_STS_PL[DATA_STS_PL.iloc[:, 3:].ne("").any(axis=1)]
1419
 
1420
  # Fill NaN after averaging
1421
  DATA_OVERALL_PL.fillna("", inplace=True)
 
1429
  get_mteb_average_fr()
1430
  get_mteb_average_pl()
1431
  get_mteb_average_zh()
1432
+ DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_BITEXT_MINING]
1433
+ DATA_BITEXT_MINING_DA = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING_DA)[["Rank", "Model", "Model Size (Million Parameters)"] + TASK_LIST_BITEXT_MINING_DA]
1434
+ DATA_CLASSIFICATION_DA = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_DA)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_DA]
1435
+ DATA_CLASSIFICATION_NB = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_NB)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_NB]
1436
+ DATA_CLASSIFICATION_SV = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_SV)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_SV]
1437
+ DATA_CLASSIFICATION_OTHER = get_mteb_data(["Classification"], [], TASK_LIST_CLASSIFICATION_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLASSIFICATION_OTHER]
1438
+ DATA_CLUSTERING_DE = get_mteb_data(["Clustering"], [], TASK_LIST_CLUSTERING_DE)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_CLUSTERING_DE]
1439
+ DATA_STS_OTHER = get_mteb_data(["STS"], [], TASK_LIST_STS_OTHER)[["Rank", "Model", "Model Size (Million Parameters)", "Average"] + TASK_LIST_STS_OTHER]
1440
 
1441
  # Exact, add all non-nan integer values for every dataset
1442
  NUM_SCORES = 0
 
1479
  DATA_SUMMARIZATION_FR,
1480
  ]:
1481
  # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
1482
+ cols_to_ignore = 4 if "Average" in d.columns else 3
1483
  # Count number of scores including only non-nan floats & excluding the rank column
1484
  NUM_SCORES += d.iloc[:, cols_to_ignore:].notna().sum().sum()
1485
  # Exclude rank & model name column (first two); Do not count different language versions as different datasets
 
1494
  # 1. Force headers to wrap
1495
  # 2. Force model column (maximum) width
1496
  # 3. Prevent model column from overflowing, scroll instead
1497
+ # 4. Prevent checkbox groups from taking up too much space
1498
  css = """
1499
  table > thead {
1500
  white-space: normal
 
1507
  table > tbody > tr > td:nth-child(2) > div {
1508
  overflow-x: auto
1509
  }
1510
+
1511
+ .filter-checkbox-group {
1512
+ max-width: max-content;
1513
+ }
1514
  """
1515
 
1516
  """
 
1830
  }
1831
 
1832
  dataframes = []
1833
+ full_dataframes = []
1834
  tabs = []
1835
 
1836
  # The following JavaScript function updates the URL parameters based on the selected task and language
 
1863
  language_per_task[current_task_language["task"]] = event.target.id
1864
  return current_task_language, language_per_task
1865
 
1866
+ NUMERIC_INTERVALS = {
1867
+ "<100M": pd.Interval(0, 100, closed="right"),
1868
+ ">100M, <500M": pd.Interval(100, 500, closed="right"),
1869
+ ">500M, <1B": pd.Interval(500, 1000, closed="right"),
1870
+ ">1B": pd.Interval(1000, 1_000_000, closed="right"),
1871
+ }
1872
+
1873
+ MODEL_TYPES = [
1874
+ "Open",
1875
+ "API",
1876
+ ]
1877
+
1878
+ def filter_data(search_query, model_types, model_sizes, *full_dataframes):
1879
+ output_dataframes = []
1880
+ for df in full_dataframes:
1881
+ # df = pd.DataFrame(data=dataframe.value["data"], columns=dataframe.value["headers"])
1882
+
1883
+ # Apply the search query
1884
+ if search_query:
1885
+ names = df["Model"].map(lambda x: re.match("<a .+?>(.+)</a>", x).group(1))
1886
+ masks = []
1887
+ for query in search_query.split(";"):
1888
+ masks.append(names.str.contains(query))
1889
+ df = df[reduce(lambda a, b: a | b, masks)]
1890
+
1891
+ # Apply the model type filtering
1892
+ if model_types != MODEL_TYPES:
1893
+ masks = []
1894
+ for model_type in model_types:
1895
+ if model_type == "Open":
1896
+ masks.append(df["Model Size (Million Parameters)"] != "")
1897
+ elif model_type == "API":
1898
+ masks.append(df["Model Size (Million Parameters)"] == "")
1899
+ df = df[reduce(lambda a, b: a | b, masks)]
1900
+
1901
+ # Apply the model size filtering
1902
+ if model_sizes != ["?", *NUMERIC_INTERVALS.keys()]:
1903
+ masks = []
1904
+ # Handle the ? only
1905
+ if "?" in model_sizes:
1906
+ masks.append(df["Model Size (Million Parameters)"] == "")
1907
+ model_sizes.remove("?")
1908
+ # Handle the numeric intervals only
1909
+ numeric_interval = pd.IntervalIndex(sorted([NUMERIC_INTERVALS[model_size] for model_size in model_sizes]))
1910
+ sizes = df["Model Size (Million Parameters)"].replace('', 0)
1911
+ masks.append(sizes.apply(lambda size: any(numeric_interval.contains(size))))
1912
+ df = df[reduce(lambda a, b: a | b, masks)]
1913
+
1914
+ output_dataframes.append(df)
1915
+ return output_dataframes
1916
+
1917
  with gr.Blocks(css=css) as block:
1918
 
1919
  # Store the current task and language for updating the URL. This is a bit hacky, but it works
 
1925
  Massive Text Embedding Benchmark (MTEB) Leaderboard. To submit, refer to the <a href="https://github.com/embeddings-benchmark/mteb#leaderboard" target="_blank" style="text-decoration: underline">MTEB GitHub repository</a> 🤗 Refer to the [MTEB paper](https://arxiv.org/abs/2210.07316) for details on metrics, tasks and models.
1926
  """)
1927
 
1928
+ with gr.Row():
1929
+ search_bar = gr.Textbox(
1930
+ label="Search Bar",
1931
+ placeholder=" 🔍 Search for your model (separate multiple queries with `;`) and press enter...",
1932
+ )
1933
+ filter_model_type = gr.CheckboxGroup(
1934
+ label="Model types",
1935
+ choices=MODEL_TYPES,
1936
+ value=MODEL_TYPES,
1937
+ interactive=True,
1938
+ elem_classes=["filter-checkbox-group"]
1939
+ )
1940
+ filter_model_sizes = gr.CheckboxGroup(
1941
+ label="Model sizes (in number of parameters)",
1942
+ choices=["?"] + list(NUMERIC_INTERVALS.keys()),
1943
+ value=["?"] + list(NUMERIC_INTERVALS.keys()),
1944
+ interactive=True,
1945
+ elem_classes=["filter-checkbox-group"]
1946
+ )
1947
+
1948
  with gr.Tabs() as outer_tabs:
1949
  # Store the tabs for updating them on load based on URL parameters
1950
  tabs.append(outer_tabs)
 
1981
 
1982
  with gr.Row():
1983
  datatype = ["number", "markdown"] + ["number"] * len(item["data"])
1984
+ dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", height=500)
1985
  dataframes.append(dataframe)
1986
 
1987
+ full_dataframe = gr.Dataframe(item["data"], datatype=datatype, type="pandas", visible=False)
1988
+ full_dataframes.append(full_dataframe)
1989
+
1990
  with gr.Row():
1991
  refresh_button = gr.Button("Refresh")
1992
  refresh_button.click(item["refresh"], inputs=None, outputs=dataframe)
 
2033
 
2034
  block.load(set_tabs_on_load, inputs=[], outputs=tabs + [current_task_language, language_per_task])
2035
 
2036
+ search_bar.submit(filter_data, inputs=[search_bar, filter_model_type, filter_model_sizes] + full_dataframes, outputs=dataframes)
2037
+ filter_model_type.change(filter_data, inputs=[search_bar, filter_model_type, filter_model_sizes] + full_dataframes, outputs=dataframes)
2038
+ filter_model_sizes.change(filter_data, inputs=[search_bar, filter_model_type, filter_model_sizes] + full_dataframes, outputs=dataframes)
2039
+
2040
  block.queue(max_size=10)
2041
  block.launch()
2042