Muennighoff commited on
Commit
eafd5c8
·
1 Parent(s): 7c14747
Files changed (2) hide show
  1. app.py +47 -196
  2. requirements.txt +4 -70
app.py CHANGED
@@ -1,3 +1,4 @@
 
1
  import json
2
 
3
  from datasets import load_dataset
@@ -719,6 +720,10 @@ MODELS_TO_SKIP = {
719
  "michaelfeil/ct2fast-bge-small-en-v1.5",
720
  "rizki/bgr-tf",
721
  "ef-zulla/e5-multi-sml-torch",
 
 
 
 
722
  }
723
 
724
  EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
@@ -1211,12 +1216,8 @@ with block:
1211
  )
1212
  with gr.Row():
1213
  data_run_bitext_mining = gr.Button("Refresh")
1214
- task_bitext_mining = gr.Variable(value=["BitextMining"])
1215
- lang_bitext_mining = gr.Variable(value=[])
1216
- datasets_bitext_mining = gr.Variable(value=TASK_LIST_BITEXT_MINING)
1217
  data_run_bitext_mining.click(
1218
- get_mteb_data,
1219
- inputs=[task_bitext_mining, lang_bitext_mining, datasets_bitext_mining],
1220
  outputs=data_bitext_mining,
1221
  )
1222
  with gr.TabItem("Danish"):
@@ -1236,16 +1237,8 @@ with block:
1236
  )
1237
  with gr.Row():
1238
  data_run_bitext_mining_da = gr.Button("Refresh")
1239
- task_bitext_mining_da = gr.Variable(value=["BitextMining"])
1240
- lang_bitext_mining_da = gr.Variable(value=[])
1241
- datasets_bitext_mining_da = gr.Variable(value=TASK_LIST_BITEXT_MINING_OTHER)
1242
  data_run_bitext_mining_da.click(
1243
- get_mteb_data,
1244
- inputs=[
1245
- task_bitext_mining_da,
1246
- lang_bitext_mining_da,
1247
- datasets_bitext_mining_da,
1248
- ],
1249
  outputs=data_bitext_mining_da,
1250
  )
1251
  with gr.TabItem("Classification"):
@@ -1265,14 +1258,8 @@ with block:
1265
  )
1266
  with gr.Row():
1267
  data_run_classification_en = gr.Button("Refresh")
1268
- task_classification_en = gr.Variable(value=["Classification"])
1269
- lang_classification_en = gr.Variable(value=["en"])
1270
  data_run_classification_en.click(
1271
- get_mteb_data,
1272
- inputs=[
1273
- task_classification_en,
1274
- lang_classification_en,
1275
- ],
1276
  outputs=data_classification_en,
1277
  )
1278
  with gr.TabItem("Chinese"):
@@ -1292,16 +1279,8 @@ with block:
1292
  )
1293
  with gr.Row():
1294
  data_run_classification_zh = gr.Button("Refresh")
1295
- task_classification_zh = gr.Variable(value=["Classification"])
1296
- lang_classification_zh = gr.Variable([])
1297
- datasets_classification_zh = gr.Variable(value=TASK_LIST_CLASSIFICATION_ZH)
1298
  data_run_classification_zh.click(
1299
- get_mteb_data,
1300
- inputs=[
1301
- task_classification_zh,
1302
- lang_classification_zh,
1303
- datasets_classification_zh,
1304
- ],
1305
  outputs=data_classification_zh,
1306
  )
1307
  with gr.TabItem("Danish"):
@@ -1321,17 +1300,9 @@ with block:
1321
  )
1322
  with gr.Row():
1323
  data_run_classification_da = gr.Button("Refresh")
1324
- task_classification_da = gr.Variable(value=["Classification"])
1325
- lang_classification_da = gr.Variable(value=[])
1326
- datasets_classification_da = gr.Variable(value=TASK_LIST_CLASSIFICATION_DA)
1327
  data_run_classification_da.click(
1328
- get_mteb_data,
1329
- inputs=[
1330
- task_classification_da,
1331
- lang_classification_da,
1332
- datasets_classification_da,
1333
- ],
1334
- outputs=data_classification_da,
1335
  )
1336
  with gr.TabItem("Norwegian"):
1337
  with gr.Row():
@@ -1350,18 +1321,10 @@ with block:
1350
  )
1351
  with gr.Row():
1352
  data_run_classification_nb = gr.Button("Refresh")
1353
- task_classification_nb = gr.Variable(value=["Classification"])
1354
- lang_classification_nb = gr.Variable(value=[])
1355
- datasets_classification_nb = gr.Variable(value=TASK_LIST_CLASSIFICATION_NB)
1356
  data_run_classification_nb.click(
1357
- get_mteb_data,
1358
- inputs=[
1359
- task_classification_nb,
1360
- lang_classification_nb,
1361
- datasets_classification_nb,
1362
- ],
1363
  outputs=data_classification_nb,
1364
- )
1365
  with gr.TabItem("Polish"):
1366
  with gr.Row():
1367
  gr.Markdown("""
@@ -1379,18 +1342,10 @@ with block:
1379
  )
1380
  with gr.Row():
1381
  data_run_classification_pl = gr.Button("Refresh")
1382
- task_classification_pl = gr.Variable(value=["Classification"])
1383
- lang_classification_pl = gr.Variable(value=[])
1384
- datasets_classification_pl = gr.Variable(value=TASK_LIST_CLASSIFICATION_PL)
1385
  data_run_classification_pl.click(
1386
- get_mteb_data,
1387
- inputs=[
1388
- task_classification_pl,
1389
- lang_classification_pl,
1390
- datasets_classification_pl,
1391
- ],
1392
  outputs=data_classification_pl,
1393
- )
1394
  with gr.TabItem("Swedish"):
1395
  with gr.Row():
1396
  gr.Markdown("""
@@ -1408,16 +1363,8 @@ with block:
1408
  )
1409
  with gr.Row():
1410
  data_run_classification_sv = gr.Button("Refresh")
1411
- task_classification_sv = gr.Variable(value=["Classification"])
1412
- lang_classification_sv = gr.Variable(value=[])
1413
- datasets_classification_sv = gr.Variable(value=TASK_LIST_CLASSIFICATION_SV)
1414
  data_run_classification_sv.click(
1415
- get_mteb_data,
1416
- inputs=[
1417
- task_classification_sv,
1418
- lang_classification_sv,
1419
- datasets_classification_sv,
1420
- ],
1421
  outputs=data_classification_sv,
1422
  )
1423
  with gr.TabItem("Other"):
@@ -1436,18 +1383,10 @@ with block:
1436
  )
1437
  with gr.Row():
1438
  data_run_classification = gr.Button("Refresh")
1439
- task_classification = gr.Variable(value=["Classification"])
1440
- lang_classification = gr.Variable(value=[])
1441
- datasets_classification = gr.Variable(value=TASK_LIST_CLASSIFICATION_OTHER)
1442
  data_run_classification.click(
1443
- get_mteb_data,
1444
- inputs=[
1445
- task_classification,
1446
- lang_classification,
1447
- datasets_classification,
1448
- ],
1449
  outputs=data_classification,
1450
- )
1451
  with gr.TabItem("Clustering"):
1452
  with gr.TabItem("English"):
1453
  with gr.Row():
@@ -1465,12 +1404,8 @@ with block:
1465
  )
1466
  with gr.Row():
1467
  data_run_clustering_en = gr.Button("Refresh")
1468
- task_clustering = gr.Variable(value=["Clustering"])
1469
- lang_clustering = gr.Variable(value=[])
1470
- datasets_clustering = gr.Variable(value=TASK_LIST_CLUSTERING)
1471
  data_run_clustering_en.click(
1472
- get_mteb_data,
1473
- inputs=[task_clustering, lang_clustering, datasets_clustering],
1474
  outputs=data_clustering,
1475
  )
1476
  with gr.TabItem("Chinese"):
@@ -1490,12 +1425,8 @@ with block:
1490
  )
1491
  with gr.Row():
1492
  data_run_clustering_zh = gr.Button("Refresh")
1493
- task_clustering_zh = gr.Variable(value=["Clustering"])
1494
- lang_clustering_zh = gr.Variable(value=[])
1495
- datasets_clustering_zh = gr.Variable(value=TASK_LIST_CLUSTERING_ZH)
1496
  data_run_clustering_zh.click(
1497
- get_mteb_data,
1498
- inputs=[task_clustering_zh, lang_clustering_zh, datasets_clustering_zh],
1499
  outputs=data_clustering_zh,
1500
  )
1501
  with gr.TabItem("German"):
@@ -1515,12 +1446,8 @@ with block:
1515
  )
1516
  with gr.Row():
1517
  data_run_clustering_de = gr.Button("Refresh")
1518
- task_clustering_de = gr.Variable(value=["Clustering"])
1519
- lang_clustering_de = gr.Variable(value=[])
1520
- datasets_clustering_de = gr.Variable(value=TASK_LIST_CLUSTERING_DE)
1521
  data_run_clustering_de.click(
1522
- get_mteb_data,
1523
- inputs=[task_clustering_de, lang_clustering_de, datasets_clustering_de],
1524
  outputs=data_clustering_de,
1525
  )
1526
  with gr.TabItem("Polish"):
@@ -1540,12 +1467,8 @@ with block:
1540
  )
1541
  with gr.Row():
1542
  data_run_clustering_pl = gr.Button("Refresh")
1543
- task_clustering_pl = gr.Variable(value=["Clustering"])
1544
- lang_clustering_pl = gr.Variable(value=[])
1545
- datasets_clustering_pl = gr.Variable(value=TASK_LIST_CLUSTERING_PL)
1546
  data_run_clustering_pl.click(
1547
- get_mteb_data,
1548
- inputs=[task_clustering_pl, lang_clustering_pl, datasets_clustering_pl],
1549
  outputs=data_clustering_pl,
1550
  )
1551
  with gr.TabItem("Pair Classification"):
@@ -1565,16 +1488,8 @@ with block:
1565
  )
1566
  with gr.Row():
1567
  data_run_pair_classification = gr.Button("Refresh")
1568
- task_pair_classification = gr.Variable(value=["PairClassification"])
1569
- lang_pair_classification = gr.Variable(value=[])
1570
- datasets_pair_classification = gr.Variable(value=TASK_LIST_PAIR_CLASSIFICATION)
1571
  data_run_pair_classification.click(
1572
- get_mteb_data,
1573
- inputs=[
1574
- task_pair_classification,
1575
- lang_pair_classification,
1576
- datasets_pair_classification,
1577
- ],
1578
  outputs=data_pair_classification,
1579
  )
1580
  with gr.TabItem("Chinese"):
@@ -1593,23 +1508,15 @@ with block:
1593
  type="pandas",
1594
  )
1595
  with gr.Row():
1596
- data_run = gr.Button("Refresh")
1597
- task_pair_classification_zh = gr.Variable(value=["PairClassification"])
1598
- lang_pair_classification_zh = gr.Variable(value=[])
1599
- datasets_pair_classification_zh = gr.Variable(value=TASK_LIST_PAIR_CLASSIFICATION_ZH)
1600
- data_run_classification_zh.click(
1601
- get_mteb_data,
1602
- inputs=[
1603
- task_pair_classification_zh,
1604
- lang_pair_classification_zh,
1605
- datasets_pair_classification_zh,
1606
- ],
1607
  outputs=data_pair_classification_zh,
1608
  )
1609
  with gr.TabItem("Polish"):
1610
  with gr.Row():
1611
  gr.Markdown("""
1612
- **Pair Classification Chinese Leaderboard 🎭🇵🇱**
1613
 
1614
  - **Metric:** Average Precision based on Cosine Similarities (cos_sim_ap)
1615
  - **Languages:** Polish
@@ -1622,19 +1529,11 @@ with block:
1622
  type="pandas",
1623
  )
1624
  with gr.Row():
1625
- data_run = gr.Button("Refresh")
1626
- task_pair_classification_pl = gr.Variable(value=["PairClassification"])
1627
- lang_pair_classification_pl = gr.Variable(value=[])
1628
- datasets_pair_classification_pl = gr.Variable(value=TASK_LIST_PAIR_CLASSIFICATION_PL)
1629
- data_run_classification_pl.click(
1630
- get_mteb_data,
1631
- inputs=[
1632
- task_pair_classification_pl,
1633
- lang_pair_classification_pl,
1634
- datasets_pair_classification_pl,
1635
- ],
1636
  outputs=data_pair_classification_pl,
1637
- )
1638
  with gr.TabItem("Reranking"):
1639
  with gr.TabItem("English"):
1640
  with gr.Row():
@@ -1652,17 +1551,9 @@ with block:
1652
  )
1653
  with gr.Row():
1654
  data_run_reranking = gr.Button("Refresh")
1655
- task_reranking = gr.Variable(value=["Reranking"])
1656
- lang_reranking = gr.Variable(value=[])
1657
- datasets_reranking = gr.Variable(value=TASK_LIST_RERANKING)
1658
  data_run_reranking.click(
1659
- get_mteb_data,
1660
- inputs=[
1661
- task_reranking,
1662
- lang_reranking,
1663
- datasets_reranking,
1664
- ],
1665
- outputs=data_reranking
1666
  )
1667
  with gr.TabItem("Chinese"):
1668
  with gr.Row():
@@ -1681,12 +1572,8 @@ with block:
1681
  )
1682
  with gr.Row():
1683
  data_run_reranking_zh = gr.Button("Refresh")
1684
- task_reranking_zh = gr.Variable(value=["Reranking"])
1685
- lang_reranking_zh = gr.Variable(value=[])
1686
- datasets_reranking_zh = gr.Variable(value=TASK_LIST_RERANKING_ZH)
1687
  data_run_reranking_zh.click(
1688
- get_mteb_data,
1689
- inputs=[task_reranking_zh, lang_reranking_zh, datasets_reranking_zh],
1690
  outputs=data_reranking_zh,
1691
  )
1692
  with gr.TabItem("Retrieval"):
@@ -1707,17 +1594,9 @@ with block:
1707
  )
1708
  with gr.Row():
1709
  data_run_retrieval = gr.Button("Refresh")
1710
- task_retrieval = gr.Variable(value=["Retrieval"])
1711
- lang_retrieval = gr.Variable(value=[])
1712
- datasets_retrieval = gr.Variable(value=TASK_LIST_RETRIEVAL)
1713
  data_run_retrieval.click(
1714
- get_mteb_data,
1715
- inputs=[
1716
- task_retrieval,
1717
- lang_retrieval,
1718
- datasets_retrieval,
1719
- ],
1720
- outputs=data_retrieval
1721
  )
1722
  with gr.TabItem("Chinese"):
1723
  with gr.Row():
@@ -1737,12 +1616,8 @@ with block:
1737
  )
1738
  with gr.Row():
1739
  data_run_retrieval_zh = gr.Button("Refresh")
1740
- task_retrieval_zh = gr.Variable(value=["Retrieval"])
1741
- lang_retrieval_zh = gr.Variable(value=[])
1742
- datasets_retrieval_zh = gr.Variable(value=TASK_LIST_RETRIEVAL_ZH)
1743
  data_run_retrieval_zh.click(
1744
- get_mteb_data,
1745
- inputs=[task_retrieval_zh, lang_retrieval_zh, datasets_retrieval_zh],
1746
  outputs=data_retrieval_zh,
1747
  )
1748
  with gr.TabItem("Polish"):
@@ -1763,13 +1638,9 @@ with block:
1763
  )
1764
  with gr.Row():
1765
  data_run_retrieval_pl = gr.Button("Refresh")
1766
- task_retrieval_pl = gr.Variable(value=["Retrieval"])
1767
- lang_retrieval_pl = gr.Variable(value=[])
1768
- datasets_retrieval_pl = gr.Variable(value=TASK_LIST_RETRIEVAL_PL)
1769
  data_run_retrieval_pl.click(
1770
- get_mteb_data,
1771
- inputs=[task_retrieval_pl, lang_retrieval_pl, datasets_retrieval_pl],
1772
- outputs=data_retrieval_pl
1773
  )
1774
  with gr.TabItem("STS"):
1775
  with gr.TabItem("English"):
@@ -1788,12 +1659,8 @@ with block:
1788
  )
1789
  with gr.Row():
1790
  data_run_sts_en = gr.Button("Refresh")
1791
- task_sts_en = gr.Variable(value=["STS"])
1792
- lang_sts_en = gr.Variable(value=[])
1793
- datasets_sts_en = gr.Variable(value=TASK_LIST_STS)
1794
  data_run_sts_en.click(
1795
- get_mteb_data,
1796
- inputs=[task_sts_en, lang_sts_en, datasets_sts_en],
1797
  outputs=data_sts_en,
1798
  )
1799
  with gr.TabItem("Chinese"):
@@ -1813,12 +1680,8 @@ with block:
1813
  )
1814
  with gr.Row():
1815
  data_run_sts_zh = gr.Button("Refresh")
1816
- task_sts_zh = gr.Variable(value=["STS"])
1817
- lang_sts_zh = gr.Variable(value=[])
1818
- datasets_sts_zh = gr.Variable(value=TASK_LIST_STS_ZH)
1819
  data_run_sts_zh.click(
1820
- get_mteb_data,
1821
- inputs=[task_sts_zh, lang_sts_zh, datasets_sts_zh],
1822
  outputs=data_sts_zh,
1823
  )
1824
  with gr.TabItem("Polish"):
@@ -1838,14 +1701,10 @@ with block:
1838
  )
1839
  with gr.Row():
1840
  data_run_sts_pl = gr.Button("Refresh")
1841
- task_sts_pl = gr.Variable(value=["STS"])
1842
- lang_sts_pl = gr.Variable(value=[])
1843
- datasets_sts_pl = gr.Variable(value=TASK_LIST_STS_PL)
1844
  data_run_sts_pl.click(
1845
- get_mteb_data,
1846
- inputs=[task_sts_pl, lang_sts_pl, datasets_sts_pl],
1847
  outputs=data_sts_pl,
1848
- )
1849
  with gr.TabItem("Other"):
1850
  with gr.Row():
1851
  gr.Markdown("""
@@ -1862,13 +1721,9 @@ with block:
1862
  )
1863
  with gr.Row():
1864
  data_run_sts_other = gr.Button("Refresh")
1865
- task_sts_other = gr.Variable(value=["STS"])
1866
- lang_sts_other = gr.Variable(value=[])
1867
- datasets_sts_other = gr.Variable(value=TASK_LIST_STS_OTHER)
1868
  data_run_sts_other.click(
1869
- get_mteb_data,
1870
- inputs=[task_sts_other, lang_sts_other, task_sts_other, datasets_sts_other],
1871
- outputs=data_sts_other
1872
  )
1873
  with gr.TabItem("Summarization"):
1874
  with gr.Row():
@@ -1886,10 +1741,8 @@ with block:
1886
  )
1887
  with gr.Row():
1888
  data_run = gr.Button("Refresh")
1889
- task_summarization = gr.Variable(value=["Summarization"])
1890
  data_run.click(
1891
- get_mteb_data,
1892
- inputs=[task_summarization],
1893
  outputs=data_summarization,
1894
  )
1895
  gr.Markdown(r"""
@@ -1914,13 +1767,11 @@ with block:
1914
  block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
1915
  """
1916
 
1917
- block.queue(concurrency_count=40, max_size=10)
1918
  block.launch()
1919
 
1920
 
1921
  # Possible changes:
1922
- # Could check if tasks are valid (Currently users could just invent new tasks - similar for languages)
1923
- # Could make it load in the background without the Gradio logo closer to the Deep RL space
1924
  # Could add graphs / other visual content
1925
  # Could add verification marks
1926
 
 
1
+ from functools import partial
2
  import json
3
 
4
  from datasets import load_dataset
 
720
  "michaelfeil/ct2fast-bge-small-en-v1.5",
721
  "rizki/bgr-tf",
722
  "ef-zulla/e5-multi-sml-torch",
723
+ "cherubhao/yogamodel",
724
+ "morgendigital/multilingual-e5-large-quantized",
725
+ "jncraton/gte-tiny-ct2-int8",
726
+ "Research2NLP/electrical_stella",
727
  }
728
 
729
  EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
 
1216
  )
1217
  with gr.Row():
1218
  data_run_bitext_mining = gr.Button("Refresh")
 
 
 
1219
  data_run_bitext_mining.click(
1220
+ partial(get_mteb_data, tasks=["BitextMining"], datasets=TASK_LIST_BITEXT_MINING),
 
1221
  outputs=data_bitext_mining,
1222
  )
1223
  with gr.TabItem("Danish"):
 
1237
  )
1238
  with gr.Row():
1239
  data_run_bitext_mining_da = gr.Button("Refresh")
 
 
 
1240
  data_run_bitext_mining_da.click(
1241
+ partial(get_mteb_data, tasks=["BitextMining"], datasets=TASK_LIST_BITEXT_MINING_OTHER),
 
 
 
 
 
1242
  outputs=data_bitext_mining_da,
1243
  )
1244
  with gr.TabItem("Classification"):
 
1258
  )
1259
  with gr.Row():
1260
  data_run_classification_en = gr.Button("Refresh")
 
 
1261
  data_run_classification_en.click(
1262
+ partial(get_mteb_data, tasks=["Classification"], langs=["en"]),
 
 
 
 
1263
  outputs=data_classification_en,
1264
  )
1265
  with gr.TabItem("Chinese"):
 
1279
  )
1280
  with gr.Row():
1281
  data_run_classification_zh = gr.Button("Refresh")
 
 
 
1282
  data_run_classification_zh.click(
1283
+ partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_ZH),
 
 
 
 
 
1284
  outputs=data_classification_zh,
1285
  )
1286
  with gr.TabItem("Danish"):
 
1300
  )
1301
  with gr.Row():
1302
  data_run_classification_da = gr.Button("Refresh")
 
 
 
1303
  data_run_classification_da.click(
1304
+ partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_DA),
1305
+ outputs=data_run_classification_da,
 
 
 
 
 
1306
  )
1307
  with gr.TabItem("Norwegian"):
1308
  with gr.Row():
 
1321
  )
1322
  with gr.Row():
1323
  data_run_classification_nb = gr.Button("Refresh")
 
 
 
1324
  data_run_classification_nb.click(
1325
+ partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_NB),
 
 
 
 
 
1326
  outputs=data_classification_nb,
1327
+ )
1328
  with gr.TabItem("Polish"):
1329
  with gr.Row():
1330
  gr.Markdown("""
 
1342
  )
1343
  with gr.Row():
1344
  data_run_classification_pl = gr.Button("Refresh")
 
 
 
1345
  data_run_classification_pl.click(
1346
+ partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_PL),
 
 
 
 
 
1347
  outputs=data_classification_pl,
1348
+ )
1349
  with gr.TabItem("Swedish"):
1350
  with gr.Row():
1351
  gr.Markdown("""
 
1363
  )
1364
  with gr.Row():
1365
  data_run_classification_sv = gr.Button("Refresh")
 
 
 
1366
  data_run_classification_sv.click(
1367
+ partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_SV),
 
 
 
 
 
1368
  outputs=data_classification_sv,
1369
  )
1370
  with gr.TabItem("Other"):
 
1383
  )
1384
  with gr.Row():
1385
  data_run_classification = gr.Button("Refresh")
 
 
 
1386
  data_run_classification.click(
1387
+ partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_OTHER),
 
 
 
 
 
1388
  outputs=data_classification,
1389
+ )
1390
  with gr.TabItem("Clustering"):
1391
  with gr.TabItem("English"):
1392
  with gr.Row():
 
1404
  )
1405
  with gr.Row():
1406
  data_run_clustering_en = gr.Button("Refresh")
 
 
 
1407
  data_run_clustering_en.click(
1408
+ partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING),
 
1409
  outputs=data_clustering,
1410
  )
1411
  with gr.TabItem("Chinese"):
 
1425
  )
1426
  with gr.Row():
1427
  data_run_clustering_zh = gr.Button("Refresh")
 
 
 
1428
  data_run_clustering_zh.click(
1429
+ partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_ZH),
 
1430
  outputs=data_clustering_zh,
1431
  )
1432
  with gr.TabItem("German"):
 
1446
  )
1447
  with gr.Row():
1448
  data_run_clustering_de = gr.Button("Refresh")
 
 
 
1449
  data_run_clustering_de.click(
1450
+ partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_DE),
 
1451
  outputs=data_clustering_de,
1452
  )
1453
  with gr.TabItem("Polish"):
 
1467
  )
1468
  with gr.Row():
1469
  data_run_clustering_pl = gr.Button("Refresh")
 
 
 
1470
  data_run_clustering_pl.click(
1471
+ partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_PL),
 
1472
  outputs=data_clustering_pl,
1473
  )
1474
  with gr.TabItem("Pair Classification"):
 
1488
  )
1489
  with gr.Row():
1490
  data_run_pair_classification = gr.Button("Refresh")
 
 
 
1491
  data_run_pair_classification.click(
1492
+ partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION),
 
 
 
 
 
1493
  outputs=data_pair_classification,
1494
  )
1495
  with gr.TabItem("Chinese"):
 
1508
  type="pandas",
1509
  )
1510
  with gr.Row():
1511
+ data_run_pair_classification_zh = gr.Button("Refresh")
1512
+ data_run_pair_classification_zh.click(
1513
+ partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_ZH),
 
 
 
 
 
 
 
 
1514
  outputs=data_pair_classification_zh,
1515
  )
1516
  with gr.TabItem("Polish"):
1517
  with gr.Row():
1518
  gr.Markdown("""
1519
+ **Pair Classification Polish Leaderboard 🎭🇵🇱**
1520
 
1521
  - **Metric:** Average Precision based on Cosine Similarities (cos_sim_ap)
1522
  - **Languages:** Polish
 
1529
  type="pandas",
1530
  )
1531
  with gr.Row():
1532
+ data_run_pair_classification_pl = gr.Button("Refresh")
1533
+ data_run_pair_classification_pl.click(
1534
+ partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_PL),
 
 
 
 
 
 
 
 
1535
  outputs=data_pair_classification_pl,
1536
+ )
1537
  with gr.TabItem("Reranking"):
1538
  with gr.TabItem("English"):
1539
  with gr.Row():
 
1551
  )
1552
  with gr.Row():
1553
  data_run_reranking = gr.Button("Refresh")
 
 
 
1554
  data_run_reranking.click(
1555
+ partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING),
1556
+ outputs=data_reranking,
 
 
 
 
 
1557
  )
1558
  with gr.TabItem("Chinese"):
1559
  with gr.Row():
 
1572
  )
1573
  with gr.Row():
1574
  data_run_reranking_zh = gr.Button("Refresh")
 
 
 
1575
  data_run_reranking_zh.click(
1576
+ partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_ZH),
 
1577
  outputs=data_reranking_zh,
1578
  )
1579
  with gr.TabItem("Retrieval"):
 
1594
  )
1595
  with gr.Row():
1596
  data_run_retrieval = gr.Button("Refresh")
 
 
 
1597
  data_run_retrieval.click(
1598
+ partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL),
1599
+ outputs=data_retrieval,
 
 
 
 
 
1600
  )
1601
  with gr.TabItem("Chinese"):
1602
  with gr.Row():
 
1616
  )
1617
  with gr.Row():
1618
  data_run_retrieval_zh = gr.Button("Refresh")
 
 
 
1619
  data_run_retrieval_zh.click(
1620
+ partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_ZH),
 
1621
  outputs=data_retrieval_zh,
1622
  )
1623
  with gr.TabItem("Polish"):
 
1638
  )
1639
  with gr.Row():
1640
  data_run_retrieval_pl = gr.Button("Refresh")
 
 
 
1641
  data_run_retrieval_pl.click(
1642
+ partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_PL),
1643
+ outputs=data_retrieval_pl,
 
1644
  )
1645
  with gr.TabItem("STS"):
1646
  with gr.TabItem("English"):
 
1659
  )
1660
  with gr.Row():
1661
  data_run_sts_en = gr.Button("Refresh")
 
 
 
1662
  data_run_sts_en.click(
1663
+ partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS),
 
1664
  outputs=data_sts_en,
1665
  )
1666
  with gr.TabItem("Chinese"):
 
1680
  )
1681
  with gr.Row():
1682
  data_run_sts_zh = gr.Button("Refresh")
 
 
 
1683
  data_run_sts_zh.click(
1684
+ partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_ZH),
 
1685
  outputs=data_sts_zh,
1686
  )
1687
  with gr.TabItem("Polish"):
 
1701
  )
1702
  with gr.Row():
1703
  data_run_sts_pl = gr.Button("Refresh")
 
 
 
1704
  data_run_sts_pl.click(
1705
+ partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_PL),
 
1706
  outputs=data_sts_pl,
1707
+ )
1708
  with gr.TabItem("Other"):
1709
  with gr.Row():
1710
  gr.Markdown("""
 
1721
  )
1722
  with gr.Row():
1723
  data_run_sts_other = gr.Button("Refresh")
 
 
 
1724
  data_run_sts_other.click(
1725
+ partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_OTHER),
1726
+ outputs=data_sts_other,
 
1727
  )
1728
  with gr.TabItem("Summarization"):
1729
  with gr.Row():
 
1741
  )
1742
  with gr.Row():
1743
  data_run = gr.Button("Refresh")
 
1744
  data_run.click(
1745
+ partial(get_mteb_data, tasks=["Summarization"]),
 
1746
  outputs=data_summarization,
1747
  )
1748
  gr.Markdown(r"""
 
1767
  block.load(get_mteb_data, inputs=[task_bitext_mining], outputs=data_bitext_mining)
1768
  """
1769
 
1770
+ block.queue(max_size=10)
1771
  block.launch()
1772
 
1773
 
1774
  # Possible changes:
 
 
1775
  # Could add graphs / other visual content
1776
  # Could add verification marks
1777
 
requirements.txt CHANGED
@@ -1,70 +1,4 @@
1
- aiofiles==23.1.0
2
- aiohttp==3.8.4
3
- aiosignal==1.3.1
4
- altair==4.2.2
5
- anyio==3.6.2
6
- APScheduler==3.10.1
7
- async-timeout==4.0.2
8
- attrs==23.1.0
9
- certifi==2022.12.7
10
- charset-normalizer==3.1.0
11
- click==8.1.3
12
- contourpy==1.0.7
13
- cycler==0.11.0
14
- datasets==2.12.0
15
- entrypoints==0.4
16
- fastapi==0.95.1
17
- ffmpy==0.3.0
18
- filelock==3.11.0
19
- fonttools==4.39.3
20
- frozenlist==1.3.3
21
- fsspec==2023.4.0
22
- gradio==3.43.2
23
- gradio-client==0.5.0
24
- h11==0.14.0
25
- httpcore==0.17.0
26
- httpx==0.24.0
27
- huggingface-hub==0.16.4
28
- idna==3.4
29
- Jinja2==3.1.2
30
- jsonschema==4.17.3
31
- kiwisolver==1.4.4
32
- linkify-it-py==2.0.0
33
- markdown-it-py==2.2.0
34
- MarkupSafe==2.1.2
35
- matplotlib==3.7.1
36
- mdit-py-plugins==0.3.3
37
- mdurl==0.1.2
38
- multidict==6.0.4
39
- numpy==1.24.2
40
- orjson==3.8.10
41
- packaging==23.1
42
- pandas==2.0.0
43
- Pillow==9.5.0
44
- plotly==5.14.1
45
- pyarrow==11.0.0
46
- pydantic==1.10.7
47
- pydub==0.25.1
48
- pyparsing==3.0.9
49
- pyrsistent==0.19.3
50
- python-dateutil==2.8.2
51
- python-multipart==0.0.6
52
- pytz==2023.3
53
- pytz-deprecation-shim==0.1.0.post0
54
- PyYAML==6.0
55
- requests==2.28.2
56
- semantic-version==2.10.0
57
- six==1.16.0
58
- sniffio==1.3.0
59
- starlette==0.26.1
60
- toolz==0.12.0
61
- tqdm==4.65.0
62
- transformers==4.33.1
63
- typing_extensions==4.5.0
64
- tzdata==2023.3
65
- tzlocal==4.3
66
- uc-micro-py==1.0.1
67
- urllib3==1.26.15
68
- uvicorn==0.21.1
69
- websockets==11.0.1
70
- yarl==1.8.2
 
1
+ gradio
2
+ datasets
3
+ pandas
4
+ huggingface_hub