Muennighoff commited on
Commit
cd84165
·
verified ·
1 Parent(s): 51e109c

French MTEB

Browse files
Files changed (2) hide show
  1. EXTERNAL_MODEL_RESULTS.json +0 -0
  2. app.py +467 -61
EXTERNAL_MODEL_RESULTS.json CHANGED
The diff for this file is too large to render. See raw diff
 
app.py CHANGED
@@ -38,8 +38,6 @@ TASK_LIST_CLASSIFICATION = [
38
  "TweetSentimentExtractionClassification",
39
  ]
40
 
41
- TASK_LIST_CLASSIFICATION_NORM = [x.replace(" (en)", "") for x in TASK_LIST_CLASSIFICATION]
42
-
43
  TASK_LIST_CLASSIFICATION_DA = [
44
  "AngryTweetsClassification",
45
  "DanishPoliticalCommentsClassification",
@@ -51,6 +49,15 @@ TASK_LIST_CLASSIFICATION_DA = [
51
  "ScalaDaClassification",
52
  ]
53
 
 
 
 
 
 
 
 
 
 
54
  TASK_LIST_CLASSIFICATION_NB = [
55
  "NoRecClassification",
56
  "NordicLangClassification",
@@ -115,6 +122,16 @@ TASK_LIST_CLUSTERING_DE = [
115
  "TenKGnadClusteringS2S",
116
  ]
117
 
 
 
 
 
 
 
 
 
 
 
118
  TASK_LIST_CLUSTERING_PL = [
119
  "8TagsClustering",
120
  ]
@@ -132,6 +149,11 @@ TASK_LIST_PAIR_CLASSIFICATION = [
132
  "TwitterURLCorpus",
133
  ]
134
 
 
 
 
 
 
135
  TASK_LIST_PAIR_CLASSIFICATION_PL = [
136
  "CDSC-E",
137
  "PPC",
@@ -151,6 +173,11 @@ TASK_LIST_RERANKING = [
151
  "StackOverflowDupQuestions",
152
  ]
153
 
 
 
 
 
 
154
  TASK_LIST_RERANKING_ZH = [
155
  "CMedQAv1",
156
  "CMedQAv2",
@@ -176,6 +203,15 @@ TASK_LIST_RETRIEVAL = [
176
  "TRECCOVID",
177
  ]
178
 
 
 
 
 
 
 
 
 
 
179
  TASK_LIST_RETRIEVAL_PL = [
180
  "ArguAna-PL",
181
  "DBPedia-PL",
@@ -229,6 +265,12 @@ TASK_LIST_STS = [
229
  "STSBenchmark",
230
  ]
231
 
 
 
 
 
 
 
232
  TASK_LIST_STS_PL = [
233
  "CDSC-R",
234
  "SICK-R-PL",
@@ -247,11 +289,13 @@ TASK_LIST_STS_ZH = [
247
  ]
248
 
249
  TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",]
250
- TASK_LIST_STS_NORM = [x.replace(" (en)", "").replace(" (en-en)", "") for x in TASK_LIST_STS]
251
 
252
  TASK_LIST_SUMMARIZATION = ["SummEval",]
253
 
 
 
254
  TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
 
255
  TASK_LIST_PL = TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL
256
  TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH
257
 
@@ -276,11 +320,22 @@ def make_clickable_model(model_name, link=None):
276
 
277
  # Models without metadata, thus we cannot fetch their results naturally
278
  EXTERNAL_MODELS = [
 
 
 
 
 
 
 
279
  "all-MiniLM-L12-v2",
280
  "all-MiniLM-L6-v2",
281
  "all-mpnet-base-v2",
282
  "allenai-specter",
283
- "Baichuan-text-embedding",
 
 
 
 
284
  "bert-base-swedish-cased",
285
  "bert-base-uncased",
286
  "bge-base-zh-v1.5",
@@ -292,18 +347,21 @@ EXTERNAL_MODELS = [
292
  "dfm-encoder-large-v1",
293
  "dfm-sentence-encoder-large-1",
294
  "distiluse-base-multilingual-cased-v2",
295
- "DanskBERT",
296
  "e5-base",
297
  "e5-large",
298
- "e5-small",
 
299
  "electra-small-nordic",
300
  "electra-small-swedish-cased-discriminator",
 
 
 
301
  "gbert-base",
302
  "gbert-large",
303
  "gelectra-base",
304
  "gelectra-large",
305
- "gottbert-base",
306
  "glove.6B.300d",
 
307
  "gtr-t5-base",
308
  "gtr-t5-large",
309
  "gtr-t5-xl",
@@ -311,11 +369,11 @@ EXTERNAL_MODELS = [
311
  "herbert-base-retrieval-v2",
312
  "komninos",
313
  "luotuo-bert-medium",
314
- "LASER2",
315
- "LaBSE",
316
  "m3e-base",
317
- "m3e-large",
 
318
  "msmarco-bert-co-condensor",
 
319
  "multilingual-e5-base",
320
  "multilingual-e5-large",
321
  "multilingual-e5-small",
@@ -330,14 +388,19 @@ EXTERNAL_MODELS = [
330
  "paraphrase-multilingual-MiniLM-L12-v2",
331
  "paraphrase-multilingual-mpnet-base-v2",
332
  "sentence-bert-swedish-cased",
 
 
 
333
  "sentence-t5-base",
334
  "sentence-t5-large",
335
  "sentence-t5-xl",
336
  "sentence-t5-xxl",
 
337
  "sup-simcse-bert-base-uncased",
338
  "st-polish-paraphrase-from-distilroberta",
339
- "st-polish-paraphrase-from-mpnet",
340
  "text2vec-base-chinese",
 
341
  "text2vec-large-chinese",
342
  "text-embedding-3-small",
343
  "text-embedding-3-large",
@@ -353,38 +416,62 @@ EXTERNAL_MODELS = [
353
  "text-search-curie-001",
354
  "text-search-davinci-001",
355
  "titan-embed-text-v1",
 
 
 
 
356
  "unsup-simcse-bert-base-uncased",
357
  "use-cmlm-multilingual",
 
 
358
  "voyage-lite-01-instruct",
359
- "voyage-lite-02-instruct",
360
  "xlm-roberta-base",
361
- "xlm-roberta-large",
362
  ]
363
 
364
  EXTERNAL_MODEL_TO_LINK = {
 
 
365
  "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
366
  "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
367
  "all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
368
  "all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
369
  "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
370
  "Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding",
 
 
 
 
 
371
  "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
372
  "bert-base-uncased": "https://huggingface.co/bert-base-uncased",
373
  "bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
374
  "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
375
  "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
376
  "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
 
 
377
  "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
378
  "cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
379
  "DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
 
 
 
 
 
380
  "distiluse-base-multilingual-cased-v2": "https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2",
381
  "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
382
  "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
383
  "e5-base": "https://huggingface.co/intfloat/e5-base",
384
  "e5-large": "https://huggingface.co/intfloat/e5-large",
 
385
  "e5-small": "https://huggingface.co/intfloat/e5-small",
386
  "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
387
  "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
 
 
 
388
  "gbert-base": "https://huggingface.co/deepset/gbert-base",
389
  "gbert-large": "https://huggingface.co/deepset/gbert-large",
390
  "gelectra-base": "https://huggingface.co/deepset/gelectra-base",
@@ -402,7 +489,9 @@ EXTERNAL_MODEL_TO_LINK = {
402
  "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
403
  "m3e-base": "https://huggingface.co/moka-ai/m3e-base",
404
  "m3e-large": "https://huggingface.co/moka-ai/m3e-large",
 
405
  "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
 
406
  "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
407
  "multilingual-e5-large": "https://huggingface.co/intfloat/multilingual-e5-large",
408
  "multilingual-e5-small": "https://huggingface.co/intfloat/multilingual-e5-small",
@@ -414,13 +503,18 @@ EXTERNAL_MODEL_TO_LINK = {
414
  "nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
415
  "norbert3-base": "https://huggingface.co/ltg/norbert3-base",
416
  "norbert3-large": "https://huggingface.co/ltg/norbert3-large",
 
417
  "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
418
  "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
 
 
 
419
  "sentence-bert-swedish-cased": "https://huggingface.co/KBLab/sentence-bert-swedish-cased",
420
  "sentence-t5-base": "https://huggingface.co/sentence-transformers/sentence-t5-base",
421
  "sentence-t5-large": "https://huggingface.co/sentence-transformers/sentence-t5-large",
422
  "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
423
  "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
 
424
  "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
425
  "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
426
  "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
@@ -441,8 +535,14 @@ EXTERNAL_MODEL_TO_LINK = {
441
  "text-search-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
442
  "text-search-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
443
  "titan-embed-text-v1": "https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html",
 
 
 
 
444
  "unsup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased",
445
  "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
 
 
446
  "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
447
  "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
448
  "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
@@ -450,28 +550,46 @@ EXTERNAL_MODEL_TO_LINK = {
450
  }
451
 
452
  EXTERNAL_MODEL_TO_DIM = {
 
 
453
  "all-MiniLM-L12-v2": 384,
454
  "all-MiniLM-L6-v2": 384,
455
  "all-mpnet-base-v2": 768,
456
  "allenai-specter": 768,
457
  "Baichuan-text-embedding": 1024,
 
 
 
 
 
458
  "bert-base-swedish-cased": 768,
459
  "bert-base-uncased": 768,
460
  "bge-base-zh-v1.5": 768,
461
  "bge-large-zh-v1.5": 1024,
462
  "bge-large-zh-noinstruct": 1024,
463
  "bge-small-zh-v1.5": 512,
 
 
464
  "contriever-base-msmarco": 768,
465
  "cross-en-de-roberta-sentence-transformer": 768,
466
  "DanskBERT": 768,
 
 
 
 
 
467
  "distiluse-base-multilingual-cased-v2": 512,
468
  "dfm-encoder-large-v1": 1024,
469
  "dfm-sentence-encoder-large-1": 1024,
470
  "e5-base": 768,
 
 
471
  "e5-small": 384,
472
- "e5-large": 1024,
473
  "electra-small-nordic": 256,
474
  "electra-small-swedish-cased-discriminator": 256,
 
 
 
475
  "luotuo-bert-medium": 768,
476
  "LASER2": 1024,
477
  "LaBSE": 768,
@@ -489,7 +607,9 @@ EXTERNAL_MODEL_TO_DIM = {
489
  "komninos": 300,
490
  "m3e-base": 768,
491
  "m3e-large": 768,
 
492
  "msmarco-bert-co-condensor": 768,
 
493
  "multilingual-e5-base": 768,
494
  "multilingual-e5-small": 384,
495
  "multilingual-e5-large": 1024,
@@ -501,13 +621,18 @@ EXTERNAL_MODEL_TO_DIM = {
501
  "nomic-embed-text-v1.5-512": 512,
502
  "norbert3-base": 768,
503
  "norbert3-large": 1024,
 
504
  "paraphrase-multilingual-MiniLM-L12-v2": 384,
505
  "paraphrase-multilingual-mpnet-base-v2": 768,
 
 
 
506
  "sentence-bert-swedish-cased": 768,
507
  "sentence-t5-base": 768,
508
  "sentence-t5-large": 768,
509
  "sentence-t5-xl": 768,
510
  "sentence-t5-xxl": 768,
 
511
  "sup-simcse-bert-base-uncased": 768,
512
  "st-polish-paraphrase-from-distilroberta": 768,
513
  "st-polish-paraphrase-from-mpnet": 768,
@@ -528,8 +653,14 @@ EXTERNAL_MODEL_TO_DIM = {
528
  "text-search-curie-001": 4096,
529
  "text-search-davinci-001": 12288,
530
  "titan-embed-text-v1": 1536,
 
 
 
 
531
  "unsup-simcse-bert-base-uncased": 768,
532
  "use-cmlm-multilingual": 768,
 
 
533
  "voyage-lite-01-instruct": 1024,
534
  "voyage-lite-02-instruct": 1024,
535
  "xlm-roberta-base": 768,
@@ -537,28 +668,46 @@ EXTERNAL_MODEL_TO_DIM = {
537
  }
538
 
539
  EXTERNAL_MODEL_TO_SEQLEN = {
 
 
540
  "all-MiniLM-L12-v2": 512,
541
  "all-MiniLM-L6-v2": 512,
542
  "all-mpnet-base-v2": 514,
543
  "allenai-specter": 512,
544
  "Baichuan-text-embedding": 512,
 
 
 
 
 
545
  "bert-base-swedish-cased": 512,
546
  "bert-base-uncased": 512,
547
  "bge-base-zh-v1.5": 512,
548
  "bge-large-zh-v1.5": 512,
549
  "bge-large-zh-noinstruct": 512,
550
- "bge-small-zh-v1.5": 512,
 
 
551
  "contriever-base-msmarco": 512,
552
  "cross-en-de-roberta-sentence-transformer": 514,
 
 
 
 
 
553
  "DanskBERT": 514,
554
  "dfm-encoder-large-v1": 512,
555
  "dfm-sentence-encoder-large-1": 512,
556
  "distiluse-base-multilingual-cased-v2": 512,
557
  "e5-base": 512,
558
  "e5-large": 512,
 
559
  "e5-small": 512,
560
  "electra-small-nordic": 512,
561
  "electra-small-swedish-cased-discriminator": 512,
 
 
 
562
  "gbert-base": 512,
563
  "gbert-large": 512,
564
  "gelectra-base": 512,
@@ -575,8 +724,10 @@ EXTERNAL_MODEL_TO_SEQLEN = {
575
  "LASER2": "N/A",
576
  "LaBSE": 512,
577
  "m3e-base": 512,
578
- "m3e-large": 512,
 
579
  "msmarco-bert-co-condensor": 512,
 
580
  "multilingual-e5-base": 514,
581
  "multilingual-e5-large": 514,
582
  "multilingual-e5-small": 512,
@@ -588,13 +739,18 @@ EXTERNAL_MODEL_TO_SEQLEN = {
588
  "nomic-embed-text-v1.5-512": 8192,
589
  "norbert3-base": 512,
590
  "norbert3-large": 512,
 
591
  "paraphrase-multilingual-MiniLM-L12-v2": 512,
592
  "paraphrase-multilingual-mpnet-base-v2": 514,
 
 
 
593
  "sentence-bert-swedish-cased": 512,
594
  "sentence-t5-base": 512,
595
  "sentence-t5-large": 512,
596
  "sentence-t5-xl": 512,
597
  "sentence-t5-xxl": 512,
 
598
  "sup-simcse-bert-base-uncased": 512,
599
  "st-polish-paraphrase-from-distilroberta": 514,
600
  "st-polish-paraphrase-from-mpnet": 514,
@@ -615,8 +771,14 @@ EXTERNAL_MODEL_TO_SEQLEN = {
615
  "text-search-curie-001": 2046,
616
  "text-search-davinci-001": 2046,
617
  "titan-embed-text-v1": 8000,
 
 
 
 
618
  "use-cmlm-multilingual": 512,
619
  "unsup-simcse-bert-base-uncased": 512,
 
 
620
  "voyage-lite-01-instruct": 4000,
621
  "voyage-lite-02-instruct": 4000,
622
  "xlm-roberta-base": 514,
@@ -628,23 +790,39 @@ EXTERNAL_MODEL_TO_SIZE = {
628
  "all-MiniLM-L12-v2": 0.13,
629
  "all-MiniLM-L6-v2": 0.09,
630
  "all-mpnet-base-v2": 0.44,
 
 
 
 
 
631
  "bert-base-uncased": 0.44,
632
  "bert-base-swedish-cased": 0.50,
633
  "bge-base-zh-v1.5": 0.41,
634
  "bge-large-zh-v1.5": 1.30,
635
  "bge-large-zh-noinstruct": 1.30,
636
- "bge-small-zh-v1.5": 0.10,
 
 
637
  "cross-en-de-roberta-sentence-transformer": 1.11,
638
  "contriever-base-msmarco": 0.44,
 
 
 
 
 
639
  "DanskBERT": 0.50,
640
  "distiluse-base-multilingual-cased-v2": 0.54,
641
  "dfm-encoder-large-v1": 1.42,
642
  "dfm-sentence-encoder-large-1": 1.63,
643
  "e5-base": 0.44,
644
- "e5-small": 0.13,
645
  "e5-large": 1.34,
 
 
646
  "electra-small-nordic": 0.09,
647
  "electra-small-swedish-cased-discriminator": 0.06,
 
 
 
648
  "gbert-base": 0.44,
649
  "gbert-large": 1.35,
650
  "gelectra-base": 0.44,
@@ -663,6 +841,7 @@ EXTERNAL_MODEL_TO_SIZE = {
663
  "m3e-base": 0.41,
664
  "m3e-large": 0.41,
665
  "msmarco-bert-co-condensor": 0.44,
 
666
  "multilingual-e5-base": 1.11,
667
  "multilingual-e5-small": 0.47,
668
  "multilingual-e5-large": 2.24,
@@ -676,11 +855,15 @@ EXTERNAL_MODEL_TO_SIZE = {
676
  "norbert3-large": 1.47,
677
  "paraphrase-multilingual-mpnet-base-v2": 1.11,
678
  "paraphrase-multilingual-MiniLM-L12-v2": 0.47,
 
 
 
679
  "sentence-bert-swedish-cased": 0.50,
680
  "sentence-t5-base": 0.22,
681
  "sentence-t5-large": 0.67,
682
  "sentence-t5-xl": 2.48,
683
  "sentence-t5-xxl": 9.73,
 
684
  "sup-simcse-bert-base-uncased": 0.44,
685
  "st-polish-paraphrase-from-distilroberta": 0.50,
686
  "st-polish-paraphrase-from-mpnet": 0.50,
@@ -807,16 +990,9 @@ MODELS_TO_SKIP = {
807
  "atian-chapters/Chapters-SFR-Embedding-Mistral", # Copy
808
  "rlsChapters/Chapters-SFR-Embedding-Mistral", # Copy
809
  "TitanML/jina-v2-base-en-embed", # Copy
810
- "MaziyarPanahi/GritLM-8x7B-GGUF", # GGUF variant
811
  }
812
 
813
-
814
- if os.path.exists("EXTERNAL_MODEL_RESULTS.json"):
815
- with open("EXTERNAL_MODEL_RESULTS.json") as f:
816
- EXTERNAL_MODEL_RESULTS = json.load(f)
817
- else:
818
- EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
819
-
820
  def add_lang(examples):
821
  if not(examples["eval_language"]):
822
  examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"]
@@ -824,48 +1000,62 @@ def add_lang(examples):
824
  examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"] + f' ({examples["eval_language"]})'
825
  return examples
826
 
 
 
827
  def add_task(examples):
828
  # Could be added to the dataset loading script instead
829
- if examples["mteb_dataset_name"] in TASK_LIST_CLASSIFICATION_NORM + TASK_LIST_CLASSIFICATION_DA + TASK_LIST_CLASSIFICATION_NB + TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLASSIFICATION_SV + TASK_LIST_CLASSIFICATION_ZH:
830
  examples["mteb_task"] = "Classification"
831
- elif examples["mteb_dataset_name"] in TASK_LIST_CLUSTERING + TASK_LIST_CLUSTERING_DE + TASK_LIST_CLUSTERING_PL + TASK_LIST_CLUSTERING_ZH:
832
  examples["mteb_task"] = "Clustering"
833
- elif examples["mteb_dataset_name"] in TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_PAIR_CLASSIFICATION_ZH:
834
  examples["mteb_task"] = "PairClassification"
835
- elif examples["mteb_dataset_name"] in TASK_LIST_RERANKING + TASK_LIST_RERANKING_ZH:
836
  examples["mteb_task"] = "Reranking"
837
- elif examples["mteb_dataset_name"] in TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH:
838
  examples["mteb_task"] = "Retrieval"
839
- elif examples["mteb_dataset_name"] in TASK_LIST_STS_NORM + TASK_LIST_STS_PL + TASK_LIST_STS_ZH:
840
  examples["mteb_task"] = "STS"
841
- elif examples["mteb_dataset_name"] in TASK_LIST_SUMMARIZATION:
842
  examples["mteb_task"] = "Summarization"
843
- elif examples["mteb_dataset_name"] in [x.split(" ")[0] for x in TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_OTHER]:
844
  examples["mteb_task"] = "BitextMining"
845
  else:
846
  print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
847
  examples["mteb_task"] = "Unknown"
848
  return examples
849
 
850
- if not(os.path.exists("EXTERNAL_MODEL_RESULTS.json")):
851
- pbar = tqdm(EXTERNAL_MODELS, desc="Fetching external model results")
852
- for model in pbar:
853
- pbar.set_description(f"Fetching external model results for {model!r}")
854
- ds = load_dataset("mteb/results", model, trust_remote_code=True)
855
- # For local debugging:
856
- #, download_mode='force_redownload', verification_mode="no_checks")
857
- ds = ds.map(add_lang)
858
- ds = ds.map(add_task)
859
- base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))}
860
- # For now only one metric per task - Could add more metrics lateron
861
- for task, metric in TASK_TO_METRIC.items():
862
- ds_dict = ds.filter(lambda x: (x["mteb_task"] == task) and (x["metric"] == metric))["test"].to_dict()
863
- ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
864
- EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
865
-
866
- # Save & cache EXTERNAL_MODEL_RESULTS
867
- with open("EXTERNAL_MODEL_RESULTS.json", "w") as f:
868
- json.dump(EXTERNAL_MODEL_RESULTS, f)
 
 
 
 
 
 
 
 
 
 
 
 
869
 
870
  def get_dim_seq_size(model):
871
  filenames = [sib.rfilename for sib in model.siblings]
@@ -1136,6 +1326,68 @@ def get_mteb_average_zh():
1136
 
1137
  return DATA_OVERALL_ZH
1138
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1139
  def get_mteb_average_pl():
1140
  global DATA_OVERALL_PL, DATA_CLASSIFICATION_PL, DATA_CLUSTERING_PL, DATA_PAIR_CLASSIFICATION_PL, DATA_RETRIEVAL_PL, DATA_STS_PL
1141
  DATA_OVERALL_PL = get_mteb_data(
@@ -1191,6 +1443,7 @@ def get_mteb_average_pl():
1191
  return DATA_OVERALL_PL
1192
 
1193
  get_mteb_average()
 
1194
  get_mteb_average_pl()
1195
  get_mteb_average_zh()
1196
  DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
@@ -1212,6 +1465,7 @@ for d in [
1212
  DATA_BITEXT_MINING_OTHER,
1213
  DATA_CLASSIFICATION_EN,
1214
  DATA_CLASSIFICATION_DA,
 
1215
  DATA_CLASSIFICATION_NB,
1216
  DATA_CLASSIFICATION_PL,
1217
  DATA_CLASSIFICATION_SV,
@@ -1219,21 +1473,27 @@ for d in [
1219
  DATA_CLASSIFICATION_OTHER,
1220
  DATA_CLUSTERING,
1221
  DATA_CLUSTERING_DE,
 
1222
  DATA_CLUSTERING_PL,
1223
  DATA_CLUSTERING_ZH,
1224
  DATA_PAIR_CLASSIFICATION,
 
1225
  DATA_PAIR_CLASSIFICATION_PL,
1226
  DATA_PAIR_CLASSIFICATION_ZH,
1227
  DATA_RERANKING,
 
1228
  DATA_RERANKING_ZH,
1229
  DATA_RETRIEVAL,
 
1230
  DATA_RETRIEVAL_PL,
1231
  DATA_RETRIEVAL_ZH,
1232
  DATA_STS_EN,
 
1233
  DATA_STS_PL,
1234
  DATA_STS_ZH,
1235
  DATA_STS_OTHER,
1236
  DATA_SUMMARIZATION,
 
1237
  ]:
1238
  # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
1239
  cols_to_ignore = 3 if "Average" in d.columns else 2
@@ -1308,7 +1568,26 @@ with block:
1308
  )
1309
  with gr.Row():
1310
  data_run_overall_zh = gr.Button("Refresh")
1311
- data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1312
  with gr.TabItem("Polish"):
1313
  with gr.Row():
1314
  gr.Markdown("""
@@ -1433,6 +1712,27 @@ with block:
1433
  partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_DA),
1434
  outputs=data_run_classification_da,
1435
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1436
  with gr.TabItem("Norwegian"):
1437
  with gr.Row():
1438
  gr.Markdown("""
@@ -1558,6 +1858,27 @@ with block:
1558
  partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_ZH),
1559
  outputs=data_clustering_zh,
1560
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1561
  with gr.TabItem("German"):
1562
  with gr.Row():
1563
  gr.Markdown("""
@@ -1642,6 +1963,27 @@ with block:
1642
  partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_ZH),
1643
  outputs=data_pair_classification_zh,
1644
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1645
  with gr.TabItem("Polish"):
1646
  with gr.Row():
1647
  gr.Markdown("""
@@ -1705,6 +2047,27 @@ with block:
1705
  partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_ZH),
1706
  outputs=data_reranking_zh,
1707
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1708
  with gr.TabItem("Retrieval"):
1709
  with gr.TabItem("English"):
1710
  with gr.Row():
@@ -1737,18 +2100,40 @@ with block:
1737
  - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
1738
  """)
1739
  with gr.Row():
1740
- data_retrieval_zh = gr.components.Dataframe(
1741
- DATA_RETRIEVAL_ZH,
1742
  # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
1743
- datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_ZH.columns) * 2,
1744
  type="pandas",
1745
  )
1746
  with gr.Row():
1747
- data_run_retrieval_zh = gr.Button("Refresh")
1748
- data_run_retrieval_zh.click(
1749
- partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_ZH),
1750
- outputs=data_retrieval_zh,
1751
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1752
  with gr.TabItem("Polish"):
1753
  with gr.Row():
1754
  gr.Markdown("""
@@ -1813,6 +2198,27 @@ with block:
1813
  partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_ZH),
1814
  outputs=data_sts_zh,
1815
  )
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1816
  with gr.TabItem("Polish"):
1817
  with gr.Row():
1818
  gr.Markdown("""
 
38
  "TweetSentimentExtractionClassification",
39
  ]
40
 
 
 
41
  TASK_LIST_CLASSIFICATION_DA = [
42
  "AngryTweetsClassification",
43
  "DanishPoliticalCommentsClassification",
 
49
  "ScalaDaClassification",
50
  ]
51
 
52
+ TASK_LIST_CLASSIFICATION_FR = [
53
+ "AmazonReviewsClassification (fr)",
54
+ "MasakhaNEWSClassification (fra)",
55
+ "MassiveIntentClassification (fr)",
56
+ "MassiveScenarioClassification (fr)",
57
+ "MTOPDomainClassification (fr)",
58
+ "MTOPIntentClassification (fr)",
59
+ ]
60
+
61
  TASK_LIST_CLASSIFICATION_NB = [
62
  "NoRecClassification",
63
  "NordicLangClassification",
 
122
  "TenKGnadClusteringS2S",
123
  ]
124
 
125
+ TASK_LIST_CLUSTERING_FR = [
126
+ "AlloProfClusteringP2P",
127
+ "AlloProfClusteringS2S",
128
+ "HALClusteringS2S",
129
+ "MLSUMClusteringP2P",
130
+ "MLSUMClusteringS2S",
131
+ "MasakhaNEWSClusteringP2P (fra)",
132
+ "MasakhaNEWSClusteringS2S (fra)",
133
+ ]
134
+
135
  TASK_LIST_CLUSTERING_PL = [
136
  "8TagsClustering",
137
  ]
 
149
  "TwitterURLCorpus",
150
  ]
151
 
152
+ TASK_LIST_PAIR_CLASSIFICATION_FR = [
153
+ "OpusparcusPC (fr)",
154
+ "PawsX (fr)",
155
+ ]
156
+
157
  TASK_LIST_PAIR_CLASSIFICATION_PL = [
158
  "CDSC-E",
159
  "PPC",
 
173
  "StackOverflowDupQuestions",
174
  ]
175
 
176
+ TASK_LIST_RERANKING_FR = [
177
+ "AlloprofReranking",
178
+ "SyntecReranking",
179
+ ]
180
+
181
  TASK_LIST_RERANKING_ZH = [
182
  "CMedQAv1",
183
  "CMedQAv2",
 
203
  "TRECCOVID",
204
  ]
205
 
206
+ TASK_LIST_RETRIEVAL_FR = [
207
+ "AlloprofRetrieval",
208
+ "BSARDRetrieval",
209
+ "MintakaRetrieval (fr)",
210
+ # "MultiLongDocRetrieval",
211
+ "SyntecRetrieval",
212
+ "XPQARetrieval (fr)",
213
+ ]
214
+
215
  TASK_LIST_RETRIEVAL_PL = [
216
  "ArguAna-PL",
217
  "DBPedia-PL",
 
265
  "STSBenchmark",
266
  ]
267
 
268
+ TASK_LIST_STS_FR = [
269
+ "STS22 (fr)",
270
+ "STSBenchmarkMultilingualSTS (fr)",
271
+ "SICKFr",
272
+ ]
273
+
274
  TASK_LIST_STS_PL = [
275
  "CDSC-R",
276
  "SICK-R-PL",
 
289
  ]
290
 
291
  TASK_LIST_STS_OTHER = ["STS17 (ar-ar)", "STS17 (en-ar)", "STS17 (en-de)", "STS17 (en-tr)", "STS17 (es-en)", "STS17 (es-es)", "STS17 (fr-en)", "STS17 (it-en)", "STS17 (ko-ko)", "STS17 (nl-en)", "STS22 (ar)", "STS22 (de)", "STS22 (de-en)", "STS22 (de-fr)", "STS22 (de-pl)", "STS22 (es)", "STS22 (es-en)", "STS22 (es-it)", "STS22 (fr)", "STS22 (fr-pl)", "STS22 (it)", "STS22 (pl)", "STS22 (pl-en)", "STS22 (ru)", "STS22 (tr)", "STS22 (zh-en)", "STSBenchmark",]
 
292
 
293
  TASK_LIST_SUMMARIZATION = ["SummEval",]
294
 
295
+ TASK_LIST_SUMMARIZATION_FR = ["SummEvalFr"]
296
+
297
  TASK_LIST_EN = TASK_LIST_CLASSIFICATION + TASK_LIST_CLUSTERING + TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_RERANKING + TASK_LIST_RETRIEVAL + TASK_LIST_STS + TASK_LIST_SUMMARIZATION
298
+ TASK_LIST_FR = TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLUSTERING_FR + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_RERANKING_FR + TASK_LIST_RETRIEVAL_FR + TASK_LIST_STS_FR + TASK_LIST_SUMMARIZATION_FR
299
  TASK_LIST_PL = TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLUSTERING_PL + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_RETRIEVAL_PL + TASK_LIST_STS_PL
300
  TASK_LIST_ZH = TASK_LIST_CLASSIFICATION_ZH + TASK_LIST_CLUSTERING_ZH + TASK_LIST_PAIR_CLASSIFICATION_ZH + TASK_LIST_RERANKING_ZH + TASK_LIST_RETRIEVAL_ZH + TASK_LIST_STS_ZH
301
 
 
320
 
321
  # Models without metadata, thus we cannot fetch their results naturally
322
  EXTERNAL_MODELS = [
323
+ "Baichuan-text-embedding",
324
+ "Cohere-embed-multilingual-v3.0",
325
+ "Cohere-embed-multilingual-light-v3.0",
326
+ "DanskBERT",
327
+ "LASER2",
328
+ "LaBSE",
329
+ "OpenSearch-text-hybrid",
330
  "all-MiniLM-L12-v2",
331
  "all-MiniLM-L6-v2",
332
  "all-mpnet-base-v2",
333
  "allenai-specter",
334
+ "bert-base-10lang-cased",
335
+ "bert-base-15lang-cased",
336
+ "bert-base-25lang-cased",
337
+ "bert-base-multilingual-cased",
338
+ "bert-base-multilingual-uncased",
339
  "bert-base-swedish-cased",
340
  "bert-base-uncased",
341
  "bge-base-zh-v1.5",
 
347
  "dfm-encoder-large-v1",
348
  "dfm-sentence-encoder-large-1",
349
  "distiluse-base-multilingual-cased-v2",
 
350
  "e5-base",
351
  "e5-large",
352
+ "e5-mistral-7b-instruct",
353
+ "e5-small",
354
  "electra-small-nordic",
355
  "electra-small-swedish-cased-discriminator",
356
+ "flaubert_base_cased",
357
+ "flaubert_base_uncased",
358
+ "flaubert_large_cased",
359
  "gbert-base",
360
  "gbert-large",
361
  "gelectra-base",
362
  "gelectra-large",
 
363
  "glove.6B.300d",
364
+ "gottbert-base",
365
  "gtr-t5-base",
366
  "gtr-t5-large",
367
  "gtr-t5-xl",
 
369
  "herbert-base-retrieval-v2",
370
  "komninos",
371
  "luotuo-bert-medium",
 
 
372
  "m3e-base",
373
+ "m3e-large",
374
+ "mistral-embed",
375
  "msmarco-bert-co-condensor",
376
+ "multi-qa-MiniLM-L6-cos-v1",
377
  "multilingual-e5-base",
378
  "multilingual-e5-large",
379
  "multilingual-e5-small",
 
388
  "paraphrase-multilingual-MiniLM-L12-v2",
389
  "paraphrase-multilingual-mpnet-base-v2",
390
  "sentence-bert-swedish-cased",
391
+ "sentence-camembert-base",
392
+ "sentence-camembert-large",
393
+ "sentence-croissant-llm-base",
394
  "sentence-t5-base",
395
  "sentence-t5-large",
396
  "sentence-t5-xl",
397
  "sentence-t5-xxl",
398
+ "silver-retriever-base-v1",
399
  "sup-simcse-bert-base-uncased",
400
  "st-polish-paraphrase-from-distilroberta",
401
+ "st-polish-paraphrase-from-mpnet",
402
  "text2vec-base-chinese",
403
+ "text2vec-base-multilingual",
404
  "text2vec-large-chinese",
405
  "text-embedding-3-small",
406
  "text-embedding-3-large",
 
416
  "text-search-curie-001",
417
  "text-search-davinci-001",
418
  "titan-embed-text-v1",
419
+ "udever-bloom-1b1",
420
+ "udever-bloom-560m",
421
+ "universal-sentence-encoder-multilingual-3",
422
+ "universal-sentence-encoder-multilingual-large-3",
423
  "unsup-simcse-bert-base-uncased",
424
  "use-cmlm-multilingual",
425
+ "voyage-2",
426
+ "voyage-code-2",
427
  "voyage-lite-01-instruct",
428
+ "voyage-lite-02-instruct",
429
  "xlm-roberta-base",
430
+ "xlm-roberta-large",
431
  ]
432
 
433
  EXTERNAL_MODEL_TO_LINK = {
434
+ "Cohere-embed-multilingual-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-v3.0",
435
+ "Cohere-embed-multilingual-light-v3.0": "https://huggingface.co/Cohere/Cohere-embed-multilingual-light-v3.0",
436
  "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
437
  "allenai-specter": "https://huggingface.co/sentence-transformers/allenai-specter",
438
  "all-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L12-v2",
439
  "all-MiniLM-L6-v2": "https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2",
440
  "all-mpnet-base-v2": "https://huggingface.co/sentence-transformers/all-mpnet-base-v2",
441
  "Baichuan-text-embedding": "https://platform.baichuan-ai.com/docs/text-Embedding",
442
+ "bert-base-10lang-cased": "https://huggingface.co/Geotrend/bert-base-10lang-cased",
443
+ "bert-base-15lang-cased": "https://huggingface.co/Geotrend/bert-base-15lang-cased",
444
+ "bert-base-25lang-cased": "https://huggingface.co/Geotrend/bert-base-25lang-cased",
445
+ "bert-base-multilingual-cased": "https://huggingface.co/google-bert/bert-base-multilingual-cased",
446
+ "bert-base-multilingual-uncased": "https://huggingface.co/google-bert/bert-base-multilingual-uncased",
447
  "bert-base-swedish-cased": "https://huggingface.co/KB/bert-base-swedish-cased",
448
  "bert-base-uncased": "https://huggingface.co/bert-base-uncased",
449
  "bge-base-zh-v1.5": "https://huggingface.co/BAAI/bge-base-zh-v1.5",
450
  "bge-large-zh-v1.5": "https://huggingface.co/BAAI/bge-large-zh-v1.5",
451
  "bge-large-zh-noinstruct": "https://huggingface.co/BAAI/bge-large-zh-noinstruct",
452
  "bge-small-zh-v1.5": "https://huggingface.co/BAAI/bge-small-zh-v1.5",
453
+ "camembert-base": "https://huggingface.co/almanach/camembert-base",
454
+ "camembert-large": "https://huggingface.co/almanach/camembert-large",
455
  "contriever-base-msmarco": "https://huggingface.co/nthakur/contriever-base-msmarco",
456
  "cross-en-de-roberta-sentence-transformer": "https://huggingface.co/T-Systems-onsite/cross-en-de-roberta-sentence-transformer",
457
  "DanskBERT": "https://huggingface.co/vesteinn/DanskBERT",
458
+ "distilbert-base-25lang-cased": "https://huggingface.co/Geotrend/distilbert-base-25lang-cased",
459
+ "distilbert-base-en-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-cased",
460
+ "distilbert-base-en-fr-es-pt-it-cased": "https://huggingface.co/Geotrend/distilbert-base-en-fr-es-pt-it-cased",
461
+ "distilbert-base-fr-cased": "https://huggingface.co/Geotrend/distilbert-base-fr-cased",
462
+ "distilbert-base-uncased": "https://huggingface.co/distilbert-base-uncased",
463
  "distiluse-base-multilingual-cased-v2": "https://huggingface.co/sentence-transformers/distiluse-base-multilingual-cased-v2",
464
  "dfm-encoder-large-v1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
465
  "dfm-sentence-encoder-large-1": "https://huggingface.co/chcaa/dfm-encoder-large-v1",
466
  "e5-base": "https://huggingface.co/intfloat/e5-base",
467
  "e5-large": "https://huggingface.co/intfloat/e5-large",
468
+ "e5-mistral-7b-instruct": "https://huggingface.co/intfloat/e5-mistral-7b-instruct",
469
  "e5-small": "https://huggingface.co/intfloat/e5-small",
470
  "electra-small-nordic": "https://huggingface.co/jonfd/electra-small-nordic",
471
  "electra-small-swedish-cased-discriminator": "https://huggingface.co/KBLab/electra-small-swedish-cased-discriminator",
472
+ "flaubert_base_cased": "https://huggingface.co/flaubert/flaubert_base_cased",
473
+ "flaubert_base_uncased": "https://huggingface.co/flaubert/flaubert_base_uncased",
474
+ "flaubert_large_cased": "https://huggingface.co/flaubert/flaubert_large_cased",
475
  "gbert-base": "https://huggingface.co/deepset/gbert-base",
476
  "gbert-large": "https://huggingface.co/deepset/gbert-large",
477
  "gelectra-base": "https://huggingface.co/deepset/gelectra-base",
 
489
  "LaBSE": "https://huggingface.co/sentence-transformers/LaBSE",
490
  "m3e-base": "https://huggingface.co/moka-ai/m3e-base",
491
  "m3e-large": "https://huggingface.co/moka-ai/m3e-large",
492
+ "mistral-embed": "https://docs.mistral.ai/guides/embeddings",
493
  "msmarco-bert-co-condensor": "https://huggingface.co/sentence-transformers/msmarco-bert-co-condensor",
494
+ "multi-qa-MiniLM-L6-cos-v1": "https://huggingface.co/sentence-transformers/multi-qa-MiniLM-L6-cos-v1",
495
  "multilingual-e5-base": "https://huggingface.co/intfloat/multilingual-e5-base",
496
  "multilingual-e5-large": "https://huggingface.co/intfloat/multilingual-e5-large",
497
  "multilingual-e5-small": "https://huggingface.co/intfloat/multilingual-e5-small",
 
503
  "nomic-embed-text-v1.5-512": "https://huggingface.co/nomic-ai/nomic-embed-text-v1.5",
504
  "norbert3-base": "https://huggingface.co/ltg/norbert3-base",
505
  "norbert3-large": "https://huggingface.co/ltg/norbert3-large",
506
+ "OpenSearch-text-hybrid": "https://help.aliyun.com/zh/open-search/vector-search-edition/hybrid-retrieval",
507
  "paraphrase-multilingual-mpnet-base-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-mpnet-base-v2",
508
  "paraphrase-multilingual-MiniLM-L12-v2": "https://huggingface.co/sentence-transformers/paraphrase-multilingual-MiniLM-L12-v2",
509
+ "sentence-camembert-base": "https://huggingface.co/dangvantuan/sentence-camembert-base",
510
+ "sentence-camembert-large": "https://huggingface.co/dangvantuan/sentence-camembert-large",
511
+ "sentence-croissant-llm-base": "https://huggingface.co/Wissam42/sentence-croissant-llm-base",
512
  "sentence-bert-swedish-cased": "https://huggingface.co/KBLab/sentence-bert-swedish-cased",
513
  "sentence-t5-base": "https://huggingface.co/sentence-transformers/sentence-t5-base",
514
  "sentence-t5-large": "https://huggingface.co/sentence-transformers/sentence-t5-large",
515
  "sentence-t5-xl": "https://huggingface.co/sentence-transformers/sentence-t5-xl",
516
  "sentence-t5-xxl": "https://huggingface.co/sentence-transformers/sentence-t5-xxl",
517
+ "silver-retriever-base-v1": "https://huggingface.co/ipipan/silver-retriever-base-v1",
518
  "sup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/sup-simcse-bert-base-uncased",
519
  "st-polish-paraphrase-from-distilroberta": "https://huggingface.co/sdadas/st-polish-paraphrase-from-distilroberta",
520
  "st-polish-paraphrase-from-mpnet": "https://huggingface.co/sdadas/st-polish-paraphrase-from-mpnet",
 
535
  "text-search-babbage-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
536
  "text-search-davinci-001": "https://openai.com/blog/introducing-text-and-code-embeddings",
537
  "titan-embed-text-v1": "https://docs.aws.amazon.com/bedrock/latest/userguide/embeddings.html",
538
+ "udever-bloom-1b1": "https://huggingface.co/izhx/udever-bloom-1b1",
539
+ "udever-bloom-560m": "https://huggingface.co/izhx/udever-bloom-560m",
540
+ "universal-sentence-encoder-multilingual-3": "https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-3",
541
+ "universal-sentence-encoder-multilingual-large-3": "https://huggingface.co/vprelovac/universal-sentence-encoder-multilingual-large-3",
542
  "unsup-simcse-bert-base-uncased": "https://huggingface.co/princeton-nlp/unsup-simcse-bert-base-uncased",
543
  "use-cmlm-multilingual": "https://huggingface.co/sentence-transformers/use-cmlm-multilingual",
544
+ "voyage-2": "https://docs.voyageai.com/embeddings/",
545
+ "voyage-code-2": "https://docs.voyageai.com/embeddings/",
546
  "voyage-lite-01-instruct": "https://docs.voyageai.com/embeddings/",
547
  "voyage-lite-02-instruct": "https://docs.voyageai.com/embeddings/",
548
  "xlm-roberta-base": "https://huggingface.co/xlm-roberta-base",
 
550
  }
551
 
552
  EXTERNAL_MODEL_TO_DIM = {
553
+ "Cohere-embed-multilingual-v3.0": 1024,
554
+ "Cohere-embed-multilingual-light-v3.0": 384,
555
  "all-MiniLM-L12-v2": 384,
556
  "all-MiniLM-L6-v2": 384,
557
  "all-mpnet-base-v2": 768,
558
  "allenai-specter": 768,
559
  "Baichuan-text-embedding": 1024,
560
+ "bert-base-10lang-cased": 768,
561
+ "bert-base-15lang-cased": 768,
562
+ "bert-base-25lang-cased": 768,
563
+ "bert-base-multilingual-cased": 768,
564
+ "bert-base-multilingual-uncased": 768,
565
  "bert-base-swedish-cased": 768,
566
  "bert-base-uncased": 768,
567
  "bge-base-zh-v1.5": 768,
568
  "bge-large-zh-v1.5": 1024,
569
  "bge-large-zh-noinstruct": 1024,
570
  "bge-small-zh-v1.5": 512,
571
+ "camembert-base": 512,
572
+ "camembert-large": 768,
573
  "contriever-base-msmarco": 768,
574
  "cross-en-de-roberta-sentence-transformer": 768,
575
  "DanskBERT": 768,
576
+ "distilbert-base-25lang-cased": 768,
577
+ "distilbert-base-en-fr-cased": 768,
578
+ "distilbert-base-en-fr-es-pt-it-cased": 768,
579
+ "distilbert-base-fr-cased": 768,
580
+ "distilbert-base-uncased": 768,
581
  "distiluse-base-multilingual-cased-v2": 512,
582
  "dfm-encoder-large-v1": 1024,
583
  "dfm-sentence-encoder-large-1": 1024,
584
  "e5-base": 768,
585
+ "e5-large": 1024,
586
+ "e5-mistral-7b-instruct": 4096,
587
  "e5-small": 384,
 
588
  "electra-small-nordic": 256,
589
  "electra-small-swedish-cased-discriminator": 256,
590
+ "flaubert_base_cased": 768,
591
+ "flaubert_base_uncased": 768,
592
+ "flaubert_large_cased": 1024,
593
  "luotuo-bert-medium": 768,
594
  "LASER2": 1024,
595
  "LaBSE": 768,
 
607
  "komninos": 300,
608
  "m3e-base": 768,
609
  "m3e-large": 768,
610
+ "mistral-embed": 1024,
611
  "msmarco-bert-co-condensor": 768,
612
+ "multi-qa-MiniLM-L6-cos-v1": 384,
613
  "multilingual-e5-base": 768,
614
  "multilingual-e5-small": 384,
615
  "multilingual-e5-large": 1024,
 
621
  "nomic-embed-text-v1.5-512": 512,
622
  "norbert3-base": 768,
623
  "norbert3-large": 1024,
624
+ "OpenSearch-text-hybrid": 1792,
625
  "paraphrase-multilingual-MiniLM-L12-v2": 384,
626
  "paraphrase-multilingual-mpnet-base-v2": 768,
627
+ "sentence-camembert-base": 768,
628
+ "sentence-camembert-large": 1024,
629
+ "sentence-croissant-llm-base": 2048,
630
  "sentence-bert-swedish-cased": 768,
631
  "sentence-t5-base": 768,
632
  "sentence-t5-large": 768,
633
  "sentence-t5-xl": 768,
634
  "sentence-t5-xxl": 768,
635
+ "silver-retriever-base-v1": 768,
636
  "sup-simcse-bert-base-uncased": 768,
637
  "st-polish-paraphrase-from-distilroberta": 768,
638
  "st-polish-paraphrase-from-mpnet": 768,
 
653
  "text-search-curie-001": 4096,
654
  "text-search-davinci-001": 12288,
655
  "titan-embed-text-v1": 1536,
656
+ "udever-bloom-1b1": 1536,
657
+ "udever-bloom-560m": 1024,
658
+ "universal-sentence-encoder-multilingual-3": 512,
659
+ "universal-sentence-encoder-multilingual-large-3": 512,
660
  "unsup-simcse-bert-base-uncased": 768,
661
  "use-cmlm-multilingual": 768,
662
+ "voyage-2": 1024,
663
+ "voyage-code-2": 1536,
664
  "voyage-lite-01-instruct": 1024,
665
  "voyage-lite-02-instruct": 1024,
666
  "xlm-roberta-base": 768,
 
668
  }
669
 
670
  EXTERNAL_MODEL_TO_SEQLEN = {
671
+ "Cohere-embed-multilingual-v3.0": 512,
672
+ "Cohere-embed-multilingual-light-v3.0": 512,
673
  "all-MiniLM-L12-v2": 512,
674
  "all-MiniLM-L6-v2": 512,
675
  "all-mpnet-base-v2": 514,
676
  "allenai-specter": 512,
677
  "Baichuan-text-embedding": 512,
678
+ "bert-base-10lang-cased": 512,
679
+ "bert-base-15lang-cased": 512,
680
+ "bert-base-25lang-cased": 512,
681
+ "bert-base-multilingual-cased": 512,
682
+ "bert-base-multilingual-uncased": 512,
683
  "bert-base-swedish-cased": 512,
684
  "bert-base-uncased": 512,
685
  "bge-base-zh-v1.5": 512,
686
  "bge-large-zh-v1.5": 512,
687
  "bge-large-zh-noinstruct": 512,
688
+ "bge-small-zh-v1.5": 512,
689
+ "camembert-base": 512,
690
+ "camembert-large": 512,
691
  "contriever-base-msmarco": 512,
692
  "cross-en-de-roberta-sentence-transformer": 514,
693
+ "distilbert-base-25lang-cased": 512,
694
+ "distilbert-base-en-fr-cased": 512,
695
+ "distilbert-base-en-fr-es-pt-it-cased": 512,
696
+ "distilbert-base-fr-cased": 512,
697
+ "distilbert-base-uncased": 512,
698
  "DanskBERT": 514,
699
  "dfm-encoder-large-v1": 512,
700
  "dfm-sentence-encoder-large-1": 512,
701
  "distiluse-base-multilingual-cased-v2": 512,
702
  "e5-base": 512,
703
  "e5-large": 512,
704
+ "e5-mistral-7b-instruct": 32768,
705
  "e5-small": 512,
706
  "electra-small-nordic": 512,
707
  "electra-small-swedish-cased-discriminator": 512,
708
+ "flaubert_base_cased": 512,
709
+ "flaubert_base_uncased": 512,
710
+ "flaubert_large_cased": 512,
711
  "gbert-base": 512,
712
  "gbert-large": 512,
713
  "gelectra-base": 512,
 
724
  "LASER2": "N/A",
725
  "LaBSE": 512,
726
  "m3e-base": 512,
727
+ "m3e-large": 512,
728
+ # "mistral-embed": "?",
729
  "msmarco-bert-co-condensor": 512,
730
+ "multi-qa-MiniLM-L6-cos-v1": 512,
731
  "multilingual-e5-base": 514,
732
  "multilingual-e5-large": 514,
733
  "multilingual-e5-small": 512,
 
739
  "nomic-embed-text-v1.5-512": 8192,
740
  "norbert3-base": 512,
741
  "norbert3-large": 512,
742
+ "OpenSearch-text-hybrid": 512,
743
  "paraphrase-multilingual-MiniLM-L12-v2": 512,
744
  "paraphrase-multilingual-mpnet-base-v2": 514,
745
+ "sentence-camembert-base": 512,
746
+ "sentence-camembert-large": 512,
747
+ "sentence-croissant-llm-base": 2048,
748
  "sentence-bert-swedish-cased": 512,
749
  "sentence-t5-base": 512,
750
  "sentence-t5-large": 512,
751
  "sentence-t5-xl": 512,
752
  "sentence-t5-xxl": 512,
753
+ "silver-retriever-base-v1": 514,
754
  "sup-simcse-bert-base-uncased": 512,
755
  "st-polish-paraphrase-from-distilroberta": 514,
756
  "st-polish-paraphrase-from-mpnet": 514,
 
771
  "text-search-curie-001": 2046,
772
  "text-search-davinci-001": 2046,
773
  "titan-embed-text-v1": 8000,
774
+ "udever-bloom-1b1": 2048,
775
+ "udever-bloom-560m": 2048,
776
+ "universal-sentence-encoder-multilingual-3": 512,
777
+ "universal-sentence-encoder-multilingual-large-3": 512,
778
  "use-cmlm-multilingual": 512,
779
  "unsup-simcse-bert-base-uncased": 512,
780
+ "voyage-2": 1024,
781
+ "voyage-code-2": 16000,
782
  "voyage-lite-01-instruct": 4000,
783
  "voyage-lite-02-instruct": 4000,
784
  "xlm-roberta-base": 514,
 
790
  "all-MiniLM-L12-v2": 0.13,
791
  "all-MiniLM-L6-v2": 0.09,
792
  "all-mpnet-base-v2": 0.44,
793
+ "bert-base-10lang-cased": 0.61,
794
+ "bert-base-15lang-cased": 0.61,
795
+ "bert-base-25lang-cased": 0.61,
796
+ "bert-base-multilingual-cased": 0.71,
797
+ "bert-base-multilingual-uncased": 0.67,
798
  "bert-base-uncased": 0.44,
799
  "bert-base-swedish-cased": 0.50,
800
  "bge-base-zh-v1.5": 0.41,
801
  "bge-large-zh-v1.5": 1.30,
802
  "bge-large-zh-noinstruct": 1.30,
803
+ "bge-small-zh-v1.5": 0.10,
804
+ "camembert-base": 0.45,
805
+ "camembert-large": 1.35,
806
  "cross-en-de-roberta-sentence-transformer": 1.11,
807
  "contriever-base-msmarco": 0.44,
808
+ "distilbert-base-25lang-cased": 0.44,
809
+ "distilbert-base-en-fr-cased": 0.44,
810
+ "distilbert-base-en-fr-es-pt-it-cased": 0.44,
811
+ "distilbert-base-fr-cased": 0.44,
812
+ "distilbert-base-uncased": 0.44,
813
  "DanskBERT": 0.50,
814
  "distiluse-base-multilingual-cased-v2": 0.54,
815
  "dfm-encoder-large-v1": 1.42,
816
  "dfm-sentence-encoder-large-1": 1.63,
817
  "e5-base": 0.44,
 
818
  "e5-large": 1.34,
819
+ "e5-mistral-7b-instruct": 14.22,
820
+ "e5-small": 0.13,
821
  "electra-small-nordic": 0.09,
822
  "electra-small-swedish-cased-discriminator": 0.06,
823
+ "flaubert_base_cased": 0.55,
824
+ "flaubert_base_uncased": 0.55,
825
+ "flaubert_large_cased": 1.49,
826
  "gbert-base": 0.44,
827
  "gbert-large": 1.35,
828
  "gelectra-base": 0.44,
 
841
  "m3e-base": 0.41,
842
  "m3e-large": 0.41,
843
  "msmarco-bert-co-condensor": 0.44,
844
+ "multi-qa-MiniLM-L6-cos-v1": 0.09,
845
  "multilingual-e5-base": 1.11,
846
  "multilingual-e5-small": 0.47,
847
  "multilingual-e5-large": 2.24,
 
855
  "norbert3-large": 1.47,
856
  "paraphrase-multilingual-mpnet-base-v2": 1.11,
857
  "paraphrase-multilingual-MiniLM-L12-v2": 0.47,
858
+ "sentence-camembert-base": 0.44,
859
+ "sentence-camembert-large": 1.35,
860
+ "sentence-croissant-llm-base": 5.12,
861
  "sentence-bert-swedish-cased": 0.50,
862
  "sentence-t5-base": 0.22,
863
  "sentence-t5-large": 0.67,
864
  "sentence-t5-xl": 2.48,
865
  "sentence-t5-xxl": 9.73,
866
+ "silver-retriever-base-v1": 0.50,
867
  "sup-simcse-bert-base-uncased": 0.44,
868
  "st-polish-paraphrase-from-distilroberta": 0.50,
869
  "st-polish-paraphrase-from-mpnet": 0.50,
 
990
  "atian-chapters/Chapters-SFR-Embedding-Mistral", # Copy
991
  "rlsChapters/Chapters-SFR-Embedding-Mistral", # Copy
992
  "TitanML/jina-v2-base-en-embed", # Copy
993
+ "MaziyarPanahi/GritLM-8x7B-GGUF", # GGUF variant
994
  }
995
 
 
 
 
 
 
 
 
996
  def add_lang(examples):
997
  if not(examples["eval_language"]):
998
  examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"]
 
1000
  examples["mteb_dataset_name_with_lang"] = examples["mteb_dataset_name"] + f' ({examples["eval_language"]})'
1001
  return examples
1002
 
1003
+ def norm(names): return set([name.split(" ")[0] for name in names])
1004
+
1005
  def add_task(examples):
1006
  # Could be added to the dataset loading script instead
1007
+ if examples["mteb_dataset_name"] in norm(TASK_LIST_CLASSIFICATION + TASK_LIST_CLASSIFICATION_DA + TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLASSIFICATION_NB + TASK_LIST_CLASSIFICATION_PL + TASK_LIST_CLASSIFICATION_SV + TASK_LIST_CLASSIFICATION_ZH):
1008
  examples["mteb_task"] = "Classification"
1009
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_CLUSTERING + TASK_LIST_CLUSTERING_DE + TASK_LIST_CLUSTERING_FR + TASK_LIST_CLUSTERING_PL + TASK_LIST_CLUSTERING_ZH):
1010
  examples["mteb_task"] = "Clustering"
1011
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_PAIR_CLASSIFICATION + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_PAIR_CLASSIFICATION_PL + TASK_LIST_PAIR_CLASSIFICATION_ZH):
1012
  examples["mteb_task"] = "PairClassification"
1013
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_RERANKING + TASK_LIST_RERANKING_FR + TASK_LIST_RERANKING_ZH):
1014
  examples["mteb_task"] = "Reranking"
1015
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_RETRIEVAL_NORM + TASK_LIST_RETRIEVAL_FR + TASK_LIST_RETRIEVAL_PL + TASK_LIST_RETRIEVAL_ZH):
1016
  examples["mteb_task"] = "Retrieval"
1017
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_STS + TASK_LIST_STS_FR + TASK_LIST_STS_PL + TASK_LIST_STS_ZH):
1018
  examples["mteb_task"] = "STS"
1019
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_SUMMARIZATION + TASK_LIST_SUMMARIZATION_FR):
1020
  examples["mteb_task"] = "Summarization"
1021
+ elif examples["mteb_dataset_name"] in norm(TASK_LIST_BITEXT_MINING + TASK_LIST_BITEXT_MINING_OTHER):
1022
  examples["mteb_task"] = "BitextMining"
1023
  else:
1024
  print("WARNING: Task not found for dataset", examples["mteb_dataset_name"])
1025
  examples["mteb_task"] = "Unknown"
1026
  return examples
1027
 
1028
+ if os.path.exists("EXTERNAL_MODEL_RESULTS.json"):
1029
+ with open("EXTERNAL_MODEL_RESULTS.json") as f:
1030
+ EXTERNAL_MODEL_RESULTS = json.load(f)
1031
+ # Update with models not contained
1032
+ models_to_run = []
1033
+ for model in EXTERNAL_MODELS:
1034
+ if model not in EXTERNAL_MODEL_RESULTS:
1035
+ models_to_run.append(model)
1036
+ EXTERNAL_MODEL_RESULTS[model] = {k: {v: []} for k, v in TASK_TO_METRIC.items()}
1037
+ else:
1038
+ EXTERNAL_MODEL_RESULTS = {model: {k: {v: []} for k, v in TASK_TO_METRIC.items()} for model in EXTERNAL_MODELS}
1039
+ models_to_run = EXTERNAL_MODELS
1040
+
1041
+ pbar = tqdm(models_to_run, desc="Fetching external model results")
1042
+ for model in pbar:
1043
+ pbar.set_description(f"Fetching external model results for {model!r}")
1044
+ ds = load_dataset("mteb/results", model, trust_remote_code=True)
1045
+ # For local debugging:
1046
+ #, download_mode='force_redownload', verification_mode="no_checks")
1047
+ ds = ds.map(add_lang)
1048
+ ds = ds.map(add_task)
1049
+ base_dict = {"Model": make_clickable_model(model, link=EXTERNAL_MODEL_TO_LINK.get(model, "https://huggingface.co/spaces/mteb/leaderboard"))}
1050
+ # For now only one metric per task - Could add more metrics lateron
1051
+ for task, metric in TASK_TO_METRIC.items():
1052
+ ds_dict = ds.filter(lambda x: (x["mteb_task"] == task) and (x["metric"] == metric))["test"].to_dict()
1053
+ ds_dict = {k: round(v, 2) for k, v in zip(ds_dict["mteb_dataset_name_with_lang"], ds_dict["score"])}
1054
+ EXTERNAL_MODEL_RESULTS[model][task][metric].append({**base_dict, **ds_dict})
1055
+
1056
+ # Save & cache EXTERNAL_MODEL_RESULTS
1057
+ with open("EXTERNAL_MODEL_RESULTS.json", "w") as f:
1058
+ json.dump(EXTERNAL_MODEL_RESULTS, f)
1059
 
1060
  def get_dim_seq_size(model):
1061
  filenames = [sib.rfilename for sib in model.siblings]
 
1326
 
1327
  return DATA_OVERALL_ZH
1328
 
1329
+ def get_mteb_average_fr():
1330
+ global DATA_OVERALL_FR, DATA_CLASSIFICATION_FR, DATA_CLUSTERING_FR, DATA_PAIR_CLASSIFICATION_FR, DATA_RERANKING_FR, DATA_RETRIEVAL_FR, DATA_STS_FR, DATA_SUMMARIZATION_FR
1331
+ DATA_OVERALL_FR = get_mteb_data(
1332
+ tasks=[
1333
+ "Classification",
1334
+ "Clustering",
1335
+ "PairClassification",
1336
+ "Reranking",
1337
+ "Retrieval",
1338
+ "STS",
1339
+ "Summarization"
1340
+ ],
1341
+ datasets=TASK_LIST_CLASSIFICATION_FR + TASK_LIST_CLUSTERING_FR + TASK_LIST_PAIR_CLASSIFICATION_FR + TASK_LIST_RERANKING_FR + TASK_LIST_RETRIEVAL_FR + TASK_LIST_STS_FR + TASK_LIST_SUMMARIZATION_FR,
1342
+ fillna=False,
1343
+ add_emb_dim=True,
1344
+ rank=False,
1345
+ )
1346
+ # Debugging:
1347
+ # DATA_OVERALL_FR.to_csv("overall.csv")
1348
+
1349
+ DATA_OVERALL_FR.insert(1, f"Average ({len(TASK_LIST_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_FR].mean(axis=1, skipna=False))
1350
+ DATA_OVERALL_FR.insert(2, f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_CLASSIFICATION_FR].mean(axis=1, skipna=False))
1351
+ DATA_OVERALL_FR.insert(3, f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_CLUSTERING_FR].mean(axis=1, skipna=False))
1352
+ DATA_OVERALL_FR.insert(4, f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_PAIR_CLASSIFICATION_FR].mean(axis=1, skipna=False))
1353
+ DATA_OVERALL_FR.insert(5, f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_RERANKING_FR].mean(axis=1, skipna=False))
1354
+ DATA_OVERALL_FR.insert(6, f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_RETRIEVAL_FR].mean(axis=1, skipna=False))
1355
+ DATA_OVERALL_FR.insert(7, f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", DATA_OVERALL_FR[TASK_LIST_STS_FR].mean(axis=1, skipna=False))
1356
+ DATA_OVERALL_FR.insert(8, f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)", DATA_OVERALL_FR[TASK_LIST_SUMMARIZATION_FR].mean(axis=1, skipna=False))
1357
+ DATA_OVERALL_FR.sort_values(f"Average ({len(TASK_LIST_FR)} datasets)", ascending=False, inplace=True)
1358
+ # Start ranking from 1
1359
+ DATA_OVERALL_FR.insert(0, "Rank", list(range(1, len(DATA_OVERALL_FR) + 1)))
1360
+ DATA_OVERALL_FR = DATA_OVERALL_FR.round(2)
1361
+
1362
+ DATA_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_CLASSIFICATION_FR])
1363
+ DATA_CLASSIFICATION_FR = DATA_CLASSIFICATION_FR[DATA_CLASSIFICATION_FR.iloc[:, 2:].ne("").any(axis=1)]
1364
+
1365
+ DATA_CLUSTERING_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_CLUSTERING_FR])
1366
+ DATA_CLUSTERING_FR = DATA_CLUSTERING_FR[DATA_CLUSTERING_FR.iloc[:, 2:].ne("").any(axis=1)]
1367
+
1368
+ DATA_PAIR_CLASSIFICATION_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_PAIR_CLASSIFICATION_FR])
1369
+ DATA_PAIR_CLASSIFICATION_FR = DATA_PAIR_CLASSIFICATION_FR[DATA_PAIR_CLASSIFICATION_FR.iloc[:, 2:].ne("").any(axis=1)]
1370
+
1371
+ DATA_RERANKING_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_RERANKING_FR])
1372
+ DATA_RERANKING_FR = DATA_RERANKING_FR[DATA_RERANKING_FR.iloc[:, 2:].ne("").any(axis=1)]
1373
+
1374
+ DATA_RETRIEVAL_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_RETRIEVAL_FR])
1375
+ DATA_RETRIEVAL_FR = DATA_RETRIEVAL_FR[DATA_RETRIEVAL_FR.iloc[:, 2:].ne("").any(axis=1)]
1376
+
1377
+ DATA_STS_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_STS_FR])
1378
+ DATA_STS_FR = DATA_STS_FR[DATA_STS_FR.iloc[:, 2:].ne("").any(axis=1)]
1379
+
1380
+ DATA_SUMMARIZATION_FR = add_rank(DATA_OVERALL_FR[["Model"] + TASK_LIST_SUMMARIZATION_FR])
1381
+ DATA_SUMMARIZATION_FR = DATA_SUMMARIZATION_FR[DATA_SUMMARIZATION_FR.iloc[:, 1:].ne("").any(axis=1)]
1382
+
1383
+ # Fill NaN after averaging
1384
+ DATA_OVERALL_FR.fillna("", inplace=True)
1385
+
1386
+ DATA_OVERALL_FR = DATA_OVERALL_FR[["Rank", "Model", "Model Size (GB)", "Embedding Dimensions", "Max Tokens", f"Average ({len(TASK_LIST_FR)} datasets)", f"Classification Average ({len(TASK_LIST_CLASSIFICATION_FR)} datasets)", f"Clustering Average ({len(TASK_LIST_CLUSTERING_FR)} datasets)", f"Pair Classification Average ({len(TASK_LIST_PAIR_CLASSIFICATION_FR)} datasets)", f"Reranking Average ({len(TASK_LIST_RERANKING_FR)} datasets)", f"Retrieval Average ({len(TASK_LIST_RETRIEVAL_FR)} datasets)", f"STS Average ({len(TASK_LIST_STS_FR)} datasets)", f"Summarization Average ({len(TASK_LIST_SUMMARIZATION_FR)} dataset)"]]
1387
+ DATA_OVERALL_FR = DATA_OVERALL_FR[DATA_OVERALL_FR.iloc[:, 5:].ne("").any(axis=1)]
1388
+
1389
+ return DATA_OVERALL_FR
1390
+
1391
  def get_mteb_average_pl():
1392
  global DATA_OVERALL_PL, DATA_CLASSIFICATION_PL, DATA_CLUSTERING_PL, DATA_PAIR_CLASSIFICATION_PL, DATA_RETRIEVAL_PL, DATA_STS_PL
1393
  DATA_OVERALL_PL = get_mteb_data(
 
1443
  return DATA_OVERALL_PL
1444
 
1445
  get_mteb_average()
1446
+ get_mteb_average_fr()
1447
  get_mteb_average_pl()
1448
  get_mteb_average_zh()
1449
  DATA_BITEXT_MINING = get_mteb_data(["BitextMining"], [], TASK_LIST_BITEXT_MINING)
 
1465
  DATA_BITEXT_MINING_OTHER,
1466
  DATA_CLASSIFICATION_EN,
1467
  DATA_CLASSIFICATION_DA,
1468
+ DATA_CLASSIFICATION_FR,
1469
  DATA_CLASSIFICATION_NB,
1470
  DATA_CLASSIFICATION_PL,
1471
  DATA_CLASSIFICATION_SV,
 
1473
  DATA_CLASSIFICATION_OTHER,
1474
  DATA_CLUSTERING,
1475
  DATA_CLUSTERING_DE,
1476
+ DATA_CLUSTERING_FR,
1477
  DATA_CLUSTERING_PL,
1478
  DATA_CLUSTERING_ZH,
1479
  DATA_PAIR_CLASSIFICATION,
1480
+ DATA_PAIR_CLASSIFICATION_FR,
1481
  DATA_PAIR_CLASSIFICATION_PL,
1482
  DATA_PAIR_CLASSIFICATION_ZH,
1483
  DATA_RERANKING,
1484
+ DATA_RERANKING_FR,
1485
  DATA_RERANKING_ZH,
1486
  DATA_RETRIEVAL,
1487
+ DATA_RETRIEVAL_FR,
1488
  DATA_RETRIEVAL_PL,
1489
  DATA_RETRIEVAL_ZH,
1490
  DATA_STS_EN,
1491
+ DATA_STS_FR,
1492
  DATA_STS_PL,
1493
  DATA_STS_ZH,
1494
  DATA_STS_OTHER,
1495
  DATA_SUMMARIZATION,
1496
+ DATA_SUMMARIZATION_FR,
1497
  ]:
1498
  # NUM_SCORES += d.iloc[:, 1:].apply(lambda x: sum([1 for y in x if isinstance(y, float) and not np.isnan(y)]), axis=1).sum()
1499
  cols_to_ignore = 3 if "Average" in d.columns else 2
 
1568
  )
1569
  with gr.Row():
1570
  data_run_overall_zh = gr.Button("Refresh")
1571
+ data_run_overall_zh.click(get_mteb_average_zh, inputs=None, outputs=data_overall_zh)
1572
+ with gr.TabItem("French"):
1573
+ with gr.Row():
1574
+ gr.Markdown("""
1575
+ **Overall MTEB French leaderboard (F-MTEB)** 🔮🇫🇷
1576
+
1577
+ - **Metric:** Various, refer to task tabs
1578
+ - **Languages:** French
1579
+ - **Credits:** [Lyon-NLP](https://github.com/Lyon-NLP): [Gabriel Sequeira](https://github.com/GabrielSequeira), [Imene Kerboua](https://github.com/imenelydiaker), [wissam-sib](https://github.com/wissam-sib), [Sunalwing](https://github.com/Sunalwing)
1580
+ """)
1581
+ with gr.Row():
1582
+ data_overall_fr = gr.components.Dataframe(
1583
+ DATA_OVERALL_FR,
1584
+ datatype=["number", "markdown"] + ["number"] * len(DATA_OVERALL_FR.columns),
1585
+ type="pandas",
1586
+ height=600,
1587
+ )
1588
+ with gr.Row():
1589
+ data_overall_fr = gr.Button("Refresh")
1590
+ data_overall_fr.click(get_mteb_average_fr, inputs=None, outputs=data_overall_fr)
1591
  with gr.TabItem("Polish"):
1592
  with gr.Row():
1593
  gr.Markdown("""
 
1712
  partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_DA),
1713
  outputs=data_run_classification_da,
1714
  )
1715
+ with gr.TabItem("French"):
1716
+ with gr.Row():
1717
+ gr.Markdown("""
1718
+ **Classification French Leaderboard** 💙🇫🇷
1719
+
1720
+ - **Metric:** [Accuracy](https://huggingface.co/spaces/evaluate-metric/accuracy)
1721
+ - **Languages:** French
1722
+ - **Credits:**
1723
+ """)
1724
+ with gr.Row():
1725
+ data_classification_fr = gr.components.Dataframe(
1726
+ DATA_CLASSIFICATION_FR,
1727
+ datatype=["number", "markdown"] + ["number"] * len(DATA_CLASSIFICATION_FR.columns),
1728
+ type="pandas",
1729
+ )
1730
+ with gr.Row():
1731
+ data_run_classification_fr = gr.Button("Refresh")
1732
+ data_run_classification_fr.click(
1733
+ partial(get_mteb_data, tasks=["Classification"], datasets=TASK_LIST_CLASSIFICATION_FR),
1734
+ outputs=data_run_classification_fr,
1735
+ )
1736
  with gr.TabItem("Norwegian"):
1737
  with gr.Row():
1738
  gr.Markdown("""
 
1858
  partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_ZH),
1859
  outputs=data_clustering_zh,
1860
  )
1861
+ with gr.TabItem("French"):
1862
+ with gr.Row():
1863
+ gr.Markdown("""
1864
+ **Clustering French Leaderboard** ✨🇫🇷
1865
+
1866
+ - **Metric:** Validity Measure (v_measure)
1867
+ - **Languages:** French
1868
+ - **Credits:**
1869
+ """)
1870
+ with gr.Row():
1871
+ data_clustering_fr = gr.components.Dataframe(
1872
+ DATA_CLUSTERING_FR,
1873
+ datatype=["number", "markdown"] + ["number"] * len(DATA_CLUSTERING_FR.columns),
1874
+ type="pandas",
1875
+ )
1876
+ with gr.Row():
1877
+ data_run_clustering_fr = gr.Button("Refresh")
1878
+ data_run_clustering_fr.click(
1879
+ partial(get_mteb_data, tasks=["Clustering"], datasets=TASK_LIST_CLUSTERING_FR),
1880
+ outputs=data_clustering_fr,
1881
+ )
1882
  with gr.TabItem("German"):
1883
  with gr.Row():
1884
  gr.Markdown("""
 
1963
  partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_ZH),
1964
  outputs=data_pair_classification_zh,
1965
  )
1966
+ with gr.TabItem("French"):
1967
+ with gr.Row():
1968
+ gr.Markdown("""
1969
+ **Pair Classification French Leaderboard** 🎭🇫🇷
1970
+
1971
+ - **Metric:** Average Precision based on Cosine Similarities (cos_sim_ap)
1972
+ - **Languages:** French
1973
+ - **Credits:**
1974
+ """)
1975
+ with gr.Row():
1976
+ data_pair_classification_fr = gr.components.Dataframe(
1977
+ DATA_PAIR_CLASSIFICATION_FR,
1978
+ datatype=["number", "markdown"] + ["number"] * len(DATA_PAIR_CLASSIFICATION_FR.columns),
1979
+ type="pandas",
1980
+ )
1981
+ with gr.Row():
1982
+ data_run_pair_classification_fr = gr.Button("Refresh")
1983
+ data_run_pair_classification_fr.click(
1984
+ partial(get_mteb_data, tasks=["PairClassification"], datasets=TASK_LIST_PAIR_CLASSIFICATION_FR),
1985
+ outputs=data_pair_classification_fr,
1986
+ )
1987
  with gr.TabItem("Polish"):
1988
  with gr.Row():
1989
  gr.Markdown("""
 
2047
  partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_ZH),
2048
  outputs=data_reranking_zh,
2049
  )
2050
+ with gr.TabItem("French"):
2051
+ with gr.Row():
2052
+ gr.Markdown("""
2053
+ **Reranking French Leaderboard** 🥈🇫🇷
2054
+
2055
+ - **Metric:** Mean Average Precision (MAP)
2056
+ - **Languages:** French
2057
+ - **Credits:**
2058
+ """)
2059
+ with gr.Row():
2060
+ data_reranking_fr = gr.components.Dataframe(
2061
+ DATA_RERANKING_FR,
2062
+ datatype=["number", "markdown"] + ["number"] * len(DATA_RERANKING_FR.columns),
2063
+ type="pandas",
2064
+ )
2065
+ with gr.Row():
2066
+ data_run_reranking_fr = gr.Button("Refresh")
2067
+ data_run_reranking_fr.click(
2068
+ partial(get_mteb_data, tasks=["Reranking"], datasets=TASK_LIST_RERANKING_FR),
2069
+ outputs=data_reranking_fr,
2070
+ )
2071
  with gr.TabItem("Retrieval"):
2072
  with gr.TabItem("English"):
2073
  with gr.Row():
 
2100
  - **Credits:** [FlagEmbedding](https://github.com/FlagOpen/FlagEmbedding)
2101
  """)
2102
  with gr.Row():
2103
+ data_retrieval_fr = gr.components.Dataframe(
2104
+ DATA_RETRIEVAL_FR,
2105
  # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
2106
+ datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_FR.columns) * 2,
2107
  type="pandas",
2108
  )
2109
  with gr.Row():
2110
+ data_run_retrieval_fr = gr.Button("Refresh")
2111
+ data_run_retrieval_fr.click(
2112
+ partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR),
2113
+ outputs=data_retrieval_fr,
2114
  )
2115
+ with gr.TabItem("French"):
2116
+ with gr.Row():
2117
+ gr.Markdown("""
2118
+ **Retrieval French Leaderboard** 🔎🇫🇷
2119
+
2120
+ - **Metric:** Normalized Discounted Cumulative Gain @ k (ndcg_at_10)
2121
+ - **Languages:** French
2122
+ - **Credits:**
2123
+ """)
2124
+ with gr.Row():
2125
+ data_retrieval_fr = gr.components.Dataframe(
2126
+ DATA_RETRIEVAL_FR,
2127
+ # Add support for more columns than existing as a buffer for CQADupstack & other Retrieval tasks (e.g. MSMARCOv2)
2128
+ datatype=["number", "markdown"] + ["number"] * len(DATA_RETRIEVAL_FR.columns) * 2,
2129
+ type="pandas",
2130
+ )
2131
+ with gr.Row():
2132
+ data_run_retrieval_fr = gr.Button("Refresh")
2133
+ data_run_retrieval_fr.click(
2134
+ partial(get_mteb_data, tasks=["Retrieval"], datasets=TASK_LIST_RETRIEVAL_FR),
2135
+ outputs=data_retrieval_fr,
2136
+ )
2137
  with gr.TabItem("Polish"):
2138
  with gr.Row():
2139
  gr.Markdown("""
 
2198
  partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_ZH),
2199
  outputs=data_sts_zh,
2200
  )
2201
+ with gr.TabItem("French"):
2202
+ with gr.Row():
2203
+ gr.Markdown("""
2204
+ **STS French Leaderboard** 🤖🇫🇷
2205
+
2206
+ - **Metric:** Spearman correlation based on cosine similarity
2207
+ - **Languages:** French
2208
+ - **Credits:**
2209
+ """)
2210
+ with gr.Row():
2211
+ data_sts_fr = gr.components.Dataframe(
2212
+ DATA_STS_FR,
2213
+ datatype=["number", "markdown"] + ["number"] * len(DATA_STS_FR.columns),
2214
+ type="pandas",
2215
+ )
2216
+ with gr.Row():
2217
+ data_run_sts_fr = gr.Button("Refresh")
2218
+ data_run_sts_fr.click(
2219
+ partial(get_mteb_data, tasks=["STS"], datasets=TASK_LIST_STS_FR),
2220
+ outputs=data_sts_fr,
2221
+ )
2222
  with gr.TabItem("Polish"):
2223
  with gr.Row():
2224
  gr.Markdown("""