|
--- |
|
library_name: sentence-transformers |
|
pipeline_tag: sentence-similarity |
|
tags: |
|
- sentence-transformers |
|
- feature-extraction |
|
- sentence-similarity |
|
- transformers |
|
- sentence-embedding |
|
- mteb |
|
model-index: |
|
- name: bilingual-document-embedding |
|
results: |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloProfClusteringP2P |
|
config: default |
|
split: test |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
metrics: |
|
- type: v_measure |
|
value: 59.15114729440782 |
|
- type: v_measures |
|
value: [0.6088344883526443, 0.5973965747446686, 0.5447756077407359, 0.5758855880313155, 0.5557001687682872] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloProfClusteringS2S |
|
config: default |
|
split: test |
|
revision: 392ba3f5bcc8c51f578786c1fc3dae648662cb9b |
|
metrics: |
|
- type: v_measure |
|
value: 41.577823072264664 |
|
- type: v_measures |
|
value: [0.38879769314999035, 0.4006045672100778, 0.3958716052074101, 0.4405039656967712, 0.47468521453925405] |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: lyon-nlp/mteb-fr-reranking-alloprof-s2p |
|
name: MTEB AlloprofReranking |
|
config: default |
|
split: test |
|
revision: 65393d0d7a08a10b4e348135e824f385d420b0fd |
|
metrics: |
|
- type: map |
|
value: 73.87182436982245 |
|
- type: mrr |
|
value: 75.16911341393207 |
|
- type: nAUC_map_diff1 |
|
value: 57.405401360219784 |
|
- type: nAUC_map_max |
|
value: 23.453753045677463 |
|
- type: nAUC_mrr_diff1 |
|
value: 56.36974368790562 |
|
- type: nAUC_mrr_max |
|
value: 24.630226324027316 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: lyon-nlp/alloprof |
|
name: MTEB AlloprofRetrieval |
|
config: default |
|
split: test |
|
revision: fcf295ea64c750f41fadbaa37b9b861558e1bfbd |
|
metrics: |
|
- type: map_at_1 |
|
value: 32.513 |
|
- type: map_at_10 |
|
value: 43.175999999999995 |
|
- type: map_at_100 |
|
value: 44.062 |
|
- type: map_at_1000 |
|
value: 44.115 |
|
- type: map_at_20 |
|
value: 43.702999999999996 |
|
- type: map_at_3 |
|
value: 40.205999999999996 |
|
- type: map_at_5 |
|
value: 41.978 |
|
- type: mrr_at_1 |
|
value: 32.512953367875646 |
|
- type: mrr_at_10 |
|
value: 43.175871096855616 |
|
- type: mrr_at_100 |
|
value: 44.06232913339137 |
|
- type: mrr_at_1000 |
|
value: 44.11462846644048 |
|
- type: mrr_at_20 |
|
value: 43.70270697751798 |
|
- type: mrr_at_3 |
|
value: 40.20581462291314 |
|
- type: mrr_at_5 |
|
value: 41.978267127230986 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 41.780712093626434 |
|
- type: nauc_map_at_1000_max |
|
value: 37.56496014685421 |
|
- type: nauc_map_at_100_diff1 |
|
value: 41.783476025622775 |
|
- type: nauc_map_at_100_max |
|
value: 37.60097300537378 |
|
- type: nauc_map_at_10_diff1 |
|
value: 41.61653902439649 |
|
- type: nauc_map_at_10_max |
|
value: 37.33970786907192 |
|
- type: nauc_map_at_1_diff1 |
|
value: 44.90184716266261 |
|
- type: nauc_map_at_1_max |
|
value: 34.452525158255284 |
|
- type: nauc_map_at_20_diff1 |
|
value: 41.6589893917753 |
|
- type: nauc_map_at_20_max |
|
value: 37.58641485307153 |
|
- type: nauc_map_at_3_diff1 |
|
value: 42.104788108051075 |
|
- type: nauc_map_at_3_max |
|
value: 36.5928644326236 |
|
- type: nauc_map_at_5_diff1 |
|
value: 41.607739702876565 |
|
- type: nauc_map_at_5_max |
|
value: 36.907229583593825 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 41.780712093626434 |
|
- type: nauc_mrr_at_1000_max |
|
value: 37.56496014685421 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 41.783476025622775 |
|
- type: nauc_mrr_at_100_max |
|
value: 37.60097300537378 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 41.61653902439649 |
|
- type: nauc_mrr_at_10_max |
|
value: 37.33970786907192 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 44.90184716266261 |
|
- type: nauc_mrr_at_1_max |
|
value: 34.452525158255284 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 41.6589893917753 |
|
- type: nauc_mrr_at_20_max |
|
value: 37.58641485307153 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 42.104788108051075 |
|
- type: nauc_mrr_at_3_max |
|
value: 36.5928644326236 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 41.607739702876565 |
|
- type: nauc_mrr_at_5_max |
|
value: 36.907229583593825 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 41.28546857310532 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 39.086823074137 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 41.25161168648205 |
|
- type: nauc_ndcg_at_100_max |
|
value: 40.22844726831379 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 40.33705639032033 |
|
- type: nauc_ndcg_at_10_max |
|
value: 39.1320635099517 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 44.90184716266261 |
|
- type: nauc_ndcg_at_1_max |
|
value: 34.452525158255284 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 40.40784583920326 |
|
- type: nauc_ndcg_at_20_max |
|
value: 40.069552678695416 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 41.30895891523514 |
|
- type: nauc_ndcg_at_3_max |
|
value: 37.414699073823584 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 40.36028401033484 |
|
- type: nauc_ndcg_at_5_max |
|
value: 37.97523651073113 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 46.82456733521383 |
|
- type: nauc_precision_at_1000_max |
|
value: 85.71400945217201 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 40.52716981002009 |
|
- type: nauc_precision_at_100_max |
|
value: 65.51987173508483 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 35.67963463267156 |
|
- type: nauc_precision_at_10_max |
|
value: 46.155216936968856 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 44.90184716266261 |
|
- type: nauc_precision_at_1_max |
|
value: 34.452525158255284 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 34.94608063839023 |
|
- type: nauc_precision_at_20_max |
|
value: 52.447339810747174 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 39.0332348086419 |
|
- type: nauc_precision_at_3_max |
|
value: 39.83919369547502 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 36.38511756252038 |
|
- type: nauc_precision_at_5_max |
|
value: 41.375729851686486 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 46.824567335213885 |
|
- type: nauc_recall_at_1000_max |
|
value: 85.71400945217061 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 40.52716981002009 |
|
- type: nauc_recall_at_100_max |
|
value: 65.51987173508483 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 35.67963463267154 |
|
- type: nauc_recall_at_10_max |
|
value: 46.15521693696879 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 44.90184716266261 |
|
- type: nauc_recall_at_1_max |
|
value: 34.452525158255284 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 34.94608063839018 |
|
- type: nauc_recall_at_20_max |
|
value: 52.44733981074723 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 39.033234808641886 |
|
- type: nauc_recall_at_3_max |
|
value: 39.83919369547505 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 36.38511756252039 |
|
- type: nauc_recall_at_5_max |
|
value: 41.37572985168646 |
|
- type: ndcg_at_1 |
|
value: 32.513 |
|
- type: ndcg_at_10 |
|
value: 48.796 |
|
- type: ndcg_at_100 |
|
value: 53.273 |
|
- type: ndcg_at_1000 |
|
value: 54.686 |
|
- type: ndcg_at_20 |
|
value: 50.702000000000005 |
|
- type: ndcg_at_3 |
|
value: 42.721 |
|
- type: ndcg_at_5 |
|
value: 45.9 |
|
- type: precision_at_1 |
|
value: 32.513 |
|
- type: precision_at_10 |
|
value: 6.662 |
|
- type: precision_at_100 |
|
value: 0.88 |
|
- type: precision_at_1000 |
|
value: 0.099 |
|
- type: precision_at_20 |
|
value: 3.707 |
|
- type: precision_at_3 |
|
value: 16.667 |
|
- type: precision_at_5 |
|
value: 11.537 |
|
- type: recall_at_1 |
|
value: 32.513 |
|
- type: recall_at_10 |
|
value: 66.623 |
|
- type: recall_at_100 |
|
value: 87.953 |
|
- type: recall_at_1000 |
|
value: 99.136 |
|
- type: recall_at_20 |
|
value: 74.136 |
|
- type: recall_at_3 |
|
value: 50.0 |
|
- type: recall_at_5 |
|
value: 57.68600000000001 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_reviews_multi |
|
name: MTEB AmazonReviewsClassification (fr) |
|
config: fr |
|
split: test |
|
revision: 1399c76144fd37290681b995c656ef9b2e06e26d |
|
metrics: |
|
- type: accuracy |
|
value: 43.48599999999999 |
|
- type: f1 |
|
value: 41.52411498679777 |
|
- type: f1_weighted |
|
value: 41.524114986797784 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: maastrichtlawtech/bsard |
|
name: MTEB BSARDRetrieval |
|
config: default |
|
split: test |
|
revision: 5effa1b9b5fa3b0f9e12523e6e43e5f86a6e6d59 |
|
metrics: |
|
- type: map_at_1 |
|
value: 7.6579999999999995 |
|
- type: map_at_10 |
|
value: 11.706 |
|
- type: map_at_100 |
|
value: 12.948 |
|
- type: map_at_1000 |
|
value: 13.062000000000001 |
|
- type: map_at_20 |
|
value: 12.342 |
|
- type: map_at_3 |
|
value: 9.76 |
|
- type: map_at_5 |
|
value: 10.683 |
|
- type: mrr_at_1 |
|
value: 7.657657657657657 |
|
- type: mrr_at_10 |
|
value: 11.706170456170454 |
|
- type: mrr_at_100 |
|
value: 12.947870943304876 |
|
- type: mrr_at_1000 |
|
value: 13.06189894235417 |
|
- type: mrr_at_20 |
|
value: 12.342441460088518 |
|
- type: mrr_at_3 |
|
value: 9.75975975975976 |
|
- type: mrr_at_5 |
|
value: 10.683183183183182 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 1.8612167614294672 |
|
- type: nauc_map_at_1000_max |
|
value: 1.586408397411475 |
|
- type: nauc_map_at_100_diff1 |
|
value: 1.9210527499420051 |
|
- type: nauc_map_at_100_max |
|
value: 1.6875048679362115 |
|
- type: nauc_map_at_10_diff1 |
|
value: 1.5262203122571876 |
|
- type: nauc_map_at_10_max |
|
value: 1.2218645063194111 |
|
- type: nauc_map_at_1_diff1 |
|
value: -2.9465131557421675 |
|
- type: nauc_map_at_1_max |
|
value: -1.8953694303529376 |
|
- type: nauc_map_at_20_diff1 |
|
value: 1.8980212905989484 |
|
- type: nauc_map_at_20_max |
|
value: 1.1460175145386533 |
|
- type: nauc_map_at_3_diff1 |
|
value: 1.8300766192776097 |
|
- type: nauc_map_at_3_max |
|
value: -1.8234185626649715 |
|
- type: nauc_map_at_5_diff1 |
|
value: 1.9632596967629419 |
|
- type: nauc_map_at_5_max |
|
value: -1.0055447005584437 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 1.8612167614294672 |
|
- type: nauc_mrr_at_1000_max |
|
value: 1.586408397411475 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 1.9210527499420051 |
|
- type: nauc_mrr_at_100_max |
|
value: 1.6875048679362115 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 1.5262203122571876 |
|
- type: nauc_mrr_at_10_max |
|
value: 1.2218645063194111 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: -2.9465131557421675 |
|
- type: nauc_mrr_at_1_max |
|
value: -1.8953694303529376 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 1.8980212905989484 |
|
- type: nauc_mrr_at_20_max |
|
value: 1.1460175145386533 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 1.8300766192776097 |
|
- type: nauc_mrr_at_3_max |
|
value: -1.8234185626649715 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 1.9632596967629419 |
|
- type: nauc_mrr_at_5_max |
|
value: -1.0055447005584437 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 2.886203742022491 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 5.34835634942694 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 3.5183102844141443 |
|
- type: nauc_ndcg_at_100_max |
|
value: 7.479214430443089 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 1.8765330789166412 |
|
- type: nauc_ndcg_at_10_max |
|
value: 4.450826426093314 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: -2.9465131557421675 |
|
- type: nauc_ndcg_at_1_max |
|
value: -1.8953694303529376 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 2.7436758637624705 |
|
- type: nauc_ndcg_at_20_max |
|
value: 3.713084041742973 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 2.84641225972613 |
|
- type: nauc_ndcg_at_3_max |
|
value: -1.6797424196225121 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 3.0652301308463192 |
|
- type: nauc_ndcg_at_5_max |
|
value: -0.2333717294229873 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: 6.795054101803062 |
|
- type: nauc_precision_at_1000_max |
|
value: 17.30874103781348 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 7.527516539082746 |
|
- type: nauc_precision_at_100_max |
|
value: 22.74844672263555 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 2.0761049260177407 |
|
- type: nauc_precision_at_10_max |
|
value: 11.360023168126489 |
|
- type: nauc_precision_at_1_diff1 |
|
value: -2.9465131557421675 |
|
- type: nauc_precision_at_1_max |
|
value: -1.8953694303529376 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 3.923410121079766 |
|
- type: nauc_precision_at_20_max |
|
value: 8.296820719888059 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 5.1715442640514215 |
|
- type: nauc_precision_at_3_max |
|
value: -1.3116999013605417 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 5.331309068134777 |
|
- type: nauc_precision_at_5_max |
|
value: 1.5638174487988539 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 6.795054101803136 |
|
- type: nauc_recall_at_1000_max |
|
value: 17.308741037813558 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 7.527516539082746 |
|
- type: nauc_recall_at_100_max |
|
value: 22.748446722635553 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 2.076104926017711 |
|
- type: nauc_recall_at_10_max |
|
value: 11.360023168126451 |
|
- type: nauc_recall_at_1_diff1 |
|
value: -2.9465131557421675 |
|
- type: nauc_recall_at_1_max |
|
value: -1.8953694303529376 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 3.9234101210797143 |
|
- type: nauc_recall_at_20_max |
|
value: 8.296820719888002 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 5.17154426405143 |
|
- type: nauc_recall_at_3_max |
|
value: -1.311699901360526 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 5.331309068134779 |
|
- type: nauc_recall_at_5_max |
|
value: 1.5638174487988667 |
|
- type: ndcg_at_1 |
|
value: 7.6579999999999995 |
|
- type: ndcg_at_10 |
|
value: 14.633 |
|
- type: ndcg_at_100 |
|
value: 21.199 |
|
- type: ndcg_at_1000 |
|
value: 24.505 |
|
- type: ndcg_at_20 |
|
value: 16.849 |
|
- type: ndcg_at_3 |
|
value: 10.488999999999999 |
|
- type: ndcg_at_5 |
|
value: 12.156 |
|
- type: precision_at_1 |
|
value: 7.6579999999999995 |
|
- type: precision_at_10 |
|
value: 2.432 |
|
- type: precision_at_100 |
|
value: 0.563 |
|
- type: precision_at_1000 |
|
value: 0.083 |
|
- type: precision_at_20 |
|
value: 1.644 |
|
- type: precision_at_3 |
|
value: 4.204 |
|
- type: precision_at_5 |
|
value: 3.3329999999999997 |
|
- type: recall_at_1 |
|
value: 7.6579999999999995 |
|
- type: recall_at_10 |
|
value: 24.324 |
|
- type: recall_at_100 |
|
value: 56.306 |
|
- type: recall_at_1000 |
|
value: 82.883 |
|
- type: recall_at_20 |
|
value: 32.883 |
|
- type: recall_at_3 |
|
value: 12.613 |
|
- type: recall_at_5 |
|
value: 16.667 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: lyon-nlp/clustering-hal-s2s |
|
name: MTEB HALClusteringS2S |
|
config: default |
|
split: test |
|
revision: e06ebbbb123f8144bef1a5d18796f3dec9ae2915 |
|
metrics: |
|
- type: v_measure |
|
value: 24.87943546753088 |
|
- type: v_measures |
|
value: [0.278272502518604, 0.25921772339921395, 0.2641171251066139, 0.2663752999094091, 0.23649418885985485] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: reciTAL/mlsum |
|
name: MTEB MLSUMClusteringP2P |
|
config: fr |
|
split: test |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
metrics: |
|
- type: v_measure |
|
value: 44.175558680182796 |
|
- type: v_measures |
|
value: [0.44382305997614757, 0.45849638769110745, 0.45186964282579195, 0.44407241104469836, 0.39488181195438643] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: reciTAL/mlsum |
|
name: MTEB MLSUMClusteringS2S |
|
config: fr |
|
split: test |
|
revision: b5d54f8f3b61ae17845046286940f03c6bc79bc7 |
|
metrics: |
|
- type: v_measure |
|
value: 44.649498161719784 |
|
- type: v_measures |
|
value: [0.44293968862639355, 0.45358259404927, 0.4544509656034716, 0.4464804623311193, 0.400104460457011] |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_domain |
|
name: MTEB MTOPDomainClassification (fr) |
|
config: fr |
|
split: test |
|
revision: d80d48c1eb48d3562165c59d59d0034df9fff0bf |
|
metrics: |
|
- type: accuracy |
|
value: 88.46226119636705 |
|
- type: f1 |
|
value: 88.37561423387648 |
|
- type: f1_weighted |
|
value: 88.38817570958008 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/mtop_intent |
|
name: MTEB MTOPIntentClassification (fr) |
|
config: fr |
|
split: test |
|
revision: ae001d0e6b1228650b7bd1c2c65fb50ad11a8aba |
|
metrics: |
|
- type: accuracy |
|
value: 60.27247103037895 |
|
- type: f1 |
|
value: 43.36800798113768 |
|
- type: f1_weighted |
|
value: 62.65127593999621 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/masakhanews |
|
name: MTEB MasakhaNEWSClassification (fra) |
|
config: fra |
|
split: test |
|
revision: 18193f187b92da67168c655c9973a165ed9593dd |
|
metrics: |
|
- type: accuracy |
|
value: 78.0094786729858 |
|
- type: f1 |
|
value: 74.34441973526405 |
|
- type: f1_weighted |
|
value: 78.20439089386724 |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: masakhane/masakhanews |
|
name: MTEB MasakhaNEWSClusteringP2P (fra) |
|
config: fra |
|
split: test |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
metrics: |
|
- type: v_measure |
|
value: 69.98688791220088 |
|
- type: v_measures |
|
value: [1.0, 0.11908920120641955, 0.7679216739314454, 0.8367645040119921, 0.7755690164601873] |
|
- task: |
|
type: Clustering |
|
dataset: |
|
type: masakhane/masakhanews |
|
name: MTEB MasakhaNEWSClusteringS2S (fra) |
|
config: fra |
|
split: test |
|
revision: 8ccc72e69e65f40c70e117d8b3c08306bb788b60 |
|
metrics: |
|
- type: v_measure |
|
value: 36.65801636831311 |
|
- type: v_measures |
|
value: [1.0, 0.017508140483218165, 0.4153261241535689, 0.21327667744326673, 0.18678987633560207] |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_intent |
|
name: MTEB MassiveIntentClassification (fr) |
|
config: fr |
|
split: test |
|
revision: 4672e20407010da34463acc759c162ca9734bca6 |
|
metrics: |
|
- type: accuracy |
|
value: 67.39408204438466 |
|
- type: f1 |
|
value: 65.4548720535735 |
|
- type: f1_weighted |
|
value: 66.30814406163043 |
|
- task: |
|
type: Classification |
|
dataset: |
|
type: mteb/amazon_massive_scenario |
|
name: MTEB MassiveScenarioClassification (fr) |
|
config: fr |
|
split: test |
|
revision: fad2c6e8459f9e1c45d9315f4953d921437d70f8 |
|
metrics: |
|
- type: accuracy |
|
value: 74.12239408204438 |
|
- type: f1 |
|
value: 73.59473076543576 |
|
- type: f1_weighted |
|
value: 73.70038947164628 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: jinaai/mintakaqa |
|
name: MTEB MintakaRetrieval (fr) |
|
config: fr |
|
split: test |
|
revision: efa78cc2f74bbcd21eff2261f9e13aebe40b814e |
|
metrics: |
|
- type: map_at_1 |
|
value: 13.062999999999999 |
|
- type: map_at_10 |
|
value: 20.28 |
|
- type: map_at_100 |
|
value: 21.271 |
|
- type: map_at_1000 |
|
value: 21.384 |
|
- type: map_at_20 |
|
value: 20.822 |
|
- type: map_at_3 |
|
value: 18.195 |
|
- type: map_at_5 |
|
value: 19.293 |
|
- type: mrr_at_1 |
|
value: 13.063063063063062 |
|
- type: mrr_at_10 |
|
value: 20.280426530426514 |
|
- type: mrr_at_100 |
|
value: 21.27068193401496 |
|
- type: mrr_at_1000 |
|
value: 21.38419408143633 |
|
- type: mrr_at_20 |
|
value: 20.821814700930886 |
|
- type: mrr_at_3 |
|
value: 18.19546819546817 |
|
- type: mrr_at_5 |
|
value: 19.292929292929255 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 19.722064275802 |
|
- type: nauc_map_at_1000_max |
|
value: 31.384556319729395 |
|
- type: nauc_map_at_100_diff1 |
|
value: 19.71033532515874 |
|
- type: nauc_map_at_100_max |
|
value: 31.402828787179143 |
|
- type: nauc_map_at_10_diff1 |
|
value: 20.001133718713536 |
|
- type: nauc_map_at_10_max |
|
value: 31.806014785924102 |
|
- type: nauc_map_at_1_diff1 |
|
value: 29.69565140559411 |
|
- type: nauc_map_at_1_max |
|
value: 30.683216020589533 |
|
- type: nauc_map_at_20_diff1 |
|
value: 19.821797788715696 |
|
- type: nauc_map_at_20_max |
|
value: 31.59711268659909 |
|
- type: nauc_map_at_3_diff1 |
|
value: 21.615605640070964 |
|
- type: nauc_map_at_3_max |
|
value: 31.966650937266305 |
|
- type: nauc_map_at_5_diff1 |
|
value: 20.505641463837247 |
|
- type: nauc_map_at_5_max |
|
value: 31.950326449610333 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 19.722064275802 |
|
- type: nauc_mrr_at_1000_max |
|
value: 31.384556319729395 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 19.71033532515874 |
|
- type: nauc_mrr_at_100_max |
|
value: 31.402828787179143 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 20.001133718713536 |
|
- type: nauc_mrr_at_10_max |
|
value: 31.806014785924102 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 29.69565140559411 |
|
- type: nauc_mrr_at_1_max |
|
value: 30.683216020589533 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 19.821797788715696 |
|
- type: nauc_mrr_at_20_max |
|
value: 31.59711268659909 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 21.615605640070964 |
|
- type: nauc_mrr_at_3_max |
|
value: 31.966650937266305 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 20.505641463837247 |
|
- type: nauc_mrr_at_5_max |
|
value: 31.950326449610333 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 16.644876374984612 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 30.00552722677877 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 15.707910701262051 |
|
- type: nauc_ndcg_at_100_max |
|
value: 29.581303411340663 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 16.76054369006531 |
|
- type: nauc_ndcg_at_10_max |
|
value: 31.603443500691675 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 29.69565140559411 |
|
- type: nauc_ndcg_at_1_max |
|
value: 30.683216020589533 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 16.269251917194648 |
|
- type: nauc_ndcg_at_20_max |
|
value: 30.935281233489686 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 19.672433215100494 |
|
- type: nauc_ndcg_at_3_max |
|
value: 32.07848616783397 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 17.88855855774221 |
|
- type: nauc_ndcg_at_5_max |
|
value: 32.01468420337384 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -2.7987428835555157 |
|
- type: nauc_precision_at_1000_max |
|
value: 13.9766188144417 |
|
- type: nauc_precision_at_100_diff1 |
|
value: 3.9597929189458183 |
|
- type: nauc_precision_at_100_max |
|
value: 21.581900275188854 |
|
- type: nauc_precision_at_10_diff1 |
|
value: 9.174898767869335 |
|
- type: nauc_precision_at_10_max |
|
value: 30.88927862766609 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 29.69565140559411 |
|
- type: nauc_precision_at_1_max |
|
value: 30.683216020589533 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 7.774469550439256 |
|
- type: nauc_precision_at_20_max |
|
value: 28.801273985757952 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 15.058108507548344 |
|
- type: nauc_precision_at_3_max |
|
value: 32.28970787769507 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 11.81883670502361 |
|
- type: nauc_precision_at_5_max |
|
value: 32.08267698057494 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: -2.7987428835554455 |
|
- type: nauc_recall_at_1000_max |
|
value: 13.976618814441693 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 3.9597929189458183 |
|
- type: nauc_recall_at_100_max |
|
value: 21.581900275188858 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 9.17489876786934 |
|
- type: nauc_recall_at_10_max |
|
value: 30.889278627666112 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 29.69565140559411 |
|
- type: nauc_recall_at_1_max |
|
value: 30.683216020589533 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 7.774469550439271 |
|
- type: nauc_recall_at_20_max |
|
value: 28.80127398575797 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 15.058108507548361 |
|
- type: nauc_recall_at_3_max |
|
value: 32.2897078776951 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 11.818836705023593 |
|
- type: nauc_recall_at_5_max |
|
value: 32.08267698057489 |
|
- type: ndcg_at_1 |
|
value: 13.062999999999999 |
|
- type: ndcg_at_10 |
|
value: 24.166 |
|
- type: ndcg_at_100 |
|
value: 29.48 |
|
- type: ndcg_at_1000 |
|
value: 33.236 |
|
- type: ndcg_at_20 |
|
value: 26.168999999999997 |
|
- type: ndcg_at_3 |
|
value: 19.796 |
|
- type: ndcg_at_5 |
|
value: 21.762999999999998 |
|
- type: precision_at_1 |
|
value: 13.062999999999999 |
|
- type: precision_at_10 |
|
value: 3.6609999999999996 |
|
- type: precision_at_100 |
|
value: 0.626 |
|
- type: precision_at_1000 |
|
value: 0.094 |
|
- type: precision_at_20 |
|
value: 2.23 |
|
- type: precision_at_3 |
|
value: 8.135 |
|
- type: precision_at_5 |
|
value: 5.831 |
|
- type: recall_at_1 |
|
value: 13.062999999999999 |
|
- type: recall_at_10 |
|
value: 36.609 |
|
- type: recall_at_100 |
|
value: 62.572 |
|
- type: recall_at_1000 |
|
value: 93.735 |
|
- type: recall_at_20 |
|
value: 44.595 |
|
- type: recall_at_3 |
|
value: 24.406 |
|
- type: recall_at_5 |
|
value: 29.156 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: GEM/opusparcus |
|
name: MTEB OpusparcusPC (fr) |
|
config: fr |
|
split: test |
|
revision: 9e9b1f8ef51616073f47f306f7f47dd91663f86a |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 81.94822888283378 |
|
- type: cos_sim_accuracy_threshold |
|
value: 59.67133641242981 |
|
- type: cos_sim_ap |
|
value: 93.77568000367297 |
|
- type: cos_sim_f1 |
|
value: 87.33944954128441 |
|
- type: cos_sim_f1_threshold |
|
value: 48.620444536209106 |
|
- type: cos_sim_precision |
|
value: 81.15942028985508 |
|
- type: cos_sim_recall |
|
value: 94.5382323733863 |
|
- type: dot_accuracy |
|
value: 81.94822888283378 |
|
- type: dot_accuracy_threshold |
|
value: 59.67133045196533 |
|
- type: dot_ap |
|
value: 93.77568000367297 |
|
- type: dot_f1 |
|
value: 87.33944954128441 |
|
- type: dot_f1_threshold |
|
value: 48.620444536209106 |
|
- type: dot_precision |
|
value: 81.15942028985508 |
|
- type: dot_recall |
|
value: 94.5382323733863 |
|
- type: euclidean_accuracy |
|
value: 81.94822888283378 |
|
- type: euclidean_accuracy_threshold |
|
value: 89.80941772460938 |
|
- type: euclidean_ap |
|
value: 93.77568000367297 |
|
- type: euclidean_f1 |
|
value: 87.33944954128441 |
|
- type: euclidean_f1_threshold |
|
value: 101.37012004852295 |
|
- type: euclidean_precision |
|
value: 81.15942028985508 |
|
- type: euclidean_recall |
|
value: 94.5382323733863 |
|
- type: manhattan_accuracy |
|
value: 81.94822888283378 |
|
- type: manhattan_accuracy_threshold |
|
value: 2278.3992767333984 |
|
- type: manhattan_ap |
|
value: 93.736221809257 |
|
- type: manhattan_f1 |
|
value: 87.24319159101768 |
|
- type: manhattan_f1_threshold |
|
value: 2442.0352935791016 |
|
- type: manhattan_precision |
|
value: 84.06998158379374 |
|
- type: manhattan_recall |
|
value: 90.66534260178749 |
|
- type: max_accuracy |
|
value: 81.94822888283378 |
|
- type: max_ap |
|
value: 93.77568000367297 |
|
- type: max_f1 |
|
value: 87.33944954128441 |
|
- task: |
|
type: PairClassification |
|
dataset: |
|
type: google-research-datasets/paws-x |
|
name: MTEB PawsX (fr) |
|
config: fr |
|
split: test |
|
revision: 8a04d940a42cd40658986fdd8e3da561533a3646 |
|
metrics: |
|
- type: cos_sim_accuracy |
|
value: 63.0 |
|
- type: cos_sim_ap |
|
value: 62.8421811357794 |
|
- type: cos_sim_f1 |
|
value: 62.491349480968864 |
|
- type: cos_sim_precision |
|
value: 45.44539506794162 |
|
- type: cos_sim_recall |
|
value: 100.0 |
|
- type: dot_accuracy |
|
value: 63.0 |
|
- type: dot_ap |
|
value: 62.83128860568098 |
|
- type: dot_f1 |
|
value: 62.491349480968864 |
|
- type: dot_precision |
|
value: 45.44539506794162 |
|
- type: dot_recall |
|
value: 100.0 |
|
- type: euclidean_accuracy |
|
value: 63.0 |
|
- type: euclidean_ap |
|
value: 62.842229411681984 |
|
- type: euclidean_f1 |
|
value: 62.491349480968864 |
|
- type: euclidean_precision |
|
value: 45.44539506794162 |
|
- type: euclidean_recall |
|
value: 100.0 |
|
- type: manhattan_accuracy |
|
value: 63.0 |
|
- type: manhattan_ap |
|
value: 62.83631065292994 |
|
- type: manhattan_f1 |
|
value: 62.491349480968864 |
|
- type: manhattan_precision |
|
value: 45.44539506794162 |
|
- type: manhattan_recall |
|
value: 100.0 |
|
- type: max_accuracy |
|
value: 63.0 |
|
- type: max_ap |
|
value: 62.842229411681984 |
|
- type: max_f1 |
|
value: 62.491349480968864 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: Lajavaness/SICK-fr |
|
name: MTEB SICKFr |
|
config: default |
|
split: test |
|
revision: e077ab4cf4774a1e36d86d593b150422fafd8e8a |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 84.65226798174751 |
|
- type: cos_sim_spearman |
|
value: 78.46069171893217 |
|
- type: euclidean_pearson |
|
value: 82.24338215489338 |
|
- type: euclidean_spearman |
|
value: 78.46069230414263 |
|
- type: manhattan_pearson |
|
value: 82.19430457441406 |
|
- type: manhattan_spearman |
|
value: 78.39600534130474 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/sts22-crosslingual-sts |
|
name: MTEB STS22 (fr) |
|
config: fr |
|
split: test |
|
revision: de9d86b3b84231dc21f76c7b7af1f28e2f57f6e3 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 84.34356343286501 |
|
- type: cos_sim_spearman |
|
value: 83.82441862674773 |
|
- type: euclidean_pearson |
|
value: 83.36025657327927 |
|
- type: euclidean_spearman |
|
value: 83.82441862674773 |
|
- type: manhattan_pearson |
|
value: 83.28632889698486 |
|
- type: manhattan_spearman |
|
value: 83.72086058674401 |
|
- task: |
|
type: STS |
|
dataset: |
|
type: mteb/stsb_multi_mt |
|
name: MTEB STSBenchmarkMultilingualSTS (fr) |
|
config: fr |
|
split: test |
|
revision: 29afa2569dcedaaa2fe6a3dcfebab33d28b82e8c |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 85.61138706775607 |
|
- type: cos_sim_spearman |
|
value: 86.79352172029321 |
|
- type: euclidean_pearson |
|
value: 85.83986489860736 |
|
- type: euclidean_spearman |
|
value: 86.79352162100042 |
|
- type: manhattan_pearson |
|
value: 85.7236873261734 |
|
- type: manhattan_spearman |
|
value: 86.66968689546516 |
|
- task: |
|
type: Summarization |
|
dataset: |
|
type: lyon-nlp/summarization-summeval-fr-p2p |
|
name: MTEB SummEvalFr |
|
config: default |
|
split: test |
|
revision: b385812de6a9577b6f4d0f88c6a6e35395a94054 |
|
metrics: |
|
- type: cos_sim_pearson |
|
value: 30.185028827409205 |
|
- type: cos_sim_spearman |
|
value: 30.13786083775273 |
|
- type: dot_pearson |
|
value: 30.18503030285526 |
|
- type: dot_spearman |
|
value: 30.13786083775273 |
|
- task: |
|
type: Reranking |
|
dataset: |
|
type: lyon-nlp/mteb-fr-reranking-syntec-s2p |
|
name: MTEB SyntecReranking |
|
config: default |
|
split: test |
|
revision: daf0863838cd9e3ba50544cdce3ac2b338a1b0ad |
|
metrics: |
|
- type: map |
|
value: 88.89444444444443 |
|
- type: mrr |
|
value: 88.89444444444443 |
|
- type: nAUC_map_diff1 |
|
value: 65.57681789015096 |
|
- type: nAUC_map_max |
|
value: 11.774011617096468 |
|
- type: nAUC_mrr_diff1 |
|
value: 65.57681789015096 |
|
- type: nAUC_mrr_max |
|
value: 11.774011617096468 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: lyon-nlp/mteb-fr-retrieval-syntec-s2p |
|
name: MTEB SyntecRetrieval |
|
config: default |
|
split: test |
|
revision: 19661ccdca4dfc2d15122d776b61685f48c68ca9 |
|
metrics: |
|
- type: map_at_1 |
|
value: 71.0 |
|
- type: map_at_10 |
|
value: 81.3 |
|
- type: map_at_100 |
|
value: 81.407 |
|
- type: map_at_1000 |
|
value: 81.407 |
|
- type: map_at_20 |
|
value: 81.353 |
|
- type: map_at_3 |
|
value: 80.333 |
|
- type: map_at_5 |
|
value: 81.033 |
|
- type: mrr_at_1 |
|
value: 71.0 |
|
- type: mrr_at_10 |
|
value: 81.29999999999998 |
|
- type: mrr_at_100 |
|
value: 81.40672514619881 |
|
- type: mrr_at_1000 |
|
value: 81.40672514619881 |
|
- type: mrr_at_20 |
|
value: 81.35263157894735 |
|
- type: mrr_at_3 |
|
value: 80.33333333333333 |
|
- type: mrr_at_5 |
|
value: 81.03333333333333 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 65.56551939236816 |
|
- type: nauc_map_at_1000_max |
|
value: 21.060745704748204 |
|
- type: nauc_map_at_100_diff1 |
|
value: 65.56551939236816 |
|
- type: nauc_map_at_100_max |
|
value: 21.060745704748204 |
|
- type: nauc_map_at_10_diff1 |
|
value: 65.55980069698242 |
|
- type: nauc_map_at_10_max |
|
value: 21.3190442929788 |
|
- type: nauc_map_at_1_diff1 |
|
value: 67.21642606971449 |
|
- type: nauc_map_at_1_max |
|
value: 19.793191631302918 |
|
- type: nauc_map_at_20_diff1 |
|
value: 65.538721219245 |
|
- type: nauc_map_at_20_max |
|
value: 21.070102756046573 |
|
- type: nauc_map_at_3_diff1 |
|
value: 66.48655081074173 |
|
- type: nauc_map_at_3_max |
|
value: 22.33715748971969 |
|
- type: nauc_map_at_5_diff1 |
|
value: 65.4498092196869 |
|
- type: nauc_map_at_5_max |
|
value: 22.041207079018868 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 65.56551939236816 |
|
- type: nauc_mrr_at_1000_max |
|
value: 21.060745704748204 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 65.56551939236816 |
|
- type: nauc_mrr_at_100_max |
|
value: 21.060745704748204 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 65.55980069698242 |
|
- type: nauc_mrr_at_10_max |
|
value: 21.3190442929788 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 67.21642606971449 |
|
- type: nauc_mrr_at_1_max |
|
value: 19.793191631302918 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 65.538721219245 |
|
- type: nauc_mrr_at_20_max |
|
value: 21.070102756046573 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 66.48655081074173 |
|
- type: nauc_mrr_at_3_max |
|
value: 22.33715748971969 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 65.4498092196869 |
|
- type: nauc_mrr_at_5_max |
|
value: 22.041207079018868 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 65.37799652661094 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 21.618775539952175 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 65.37799652661094 |
|
- type: nauc_ndcg_at_100_max |
|
value: 21.618775539952175 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 65.24121131711044 |
|
- type: nauc_ndcg_at_10_max |
|
value: 23.00629044068508 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 67.21642606971449 |
|
- type: nauc_ndcg_at_1_max |
|
value: 19.793191631302918 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 65.11745065699384 |
|
- type: nauc_ndcg_at_20_max |
|
value: 21.64133163322825 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 66.99908176973135 |
|
- type: nauc_ndcg_at_3_max |
|
value: 25.59125363095015 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 64.80888193232458 |
|
- type: nauc_ndcg_at_5_max |
|
value: 25.161787586855322 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: nan |
|
- type: nauc_precision_at_1000_max |
|
value: nan |
|
- type: nauc_precision_at_100_diff1 |
|
value: nan |
|
- type: nauc_precision_at_100_max |
|
value: nan |
|
- type: nauc_precision_at_10_diff1 |
|
value: 61.50015561780299 |
|
- type: nauc_precision_at_10_max |
|
value: 47.88359788359829 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 67.21642606971449 |
|
- type: nauc_precision_at_1_max |
|
value: 19.793191631302918 |
|
- type: nauc_precision_at_20_diff1 |
|
value: 56.13912231559286 |
|
- type: nauc_precision_at_20_max |
|
value: 21.82539682539744 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 70.79831932773126 |
|
- type: nauc_precision_at_3_max |
|
value: 47.46148459383747 |
|
- type: nauc_precision_at_5_diff1 |
|
value: 58.50606909430468 |
|
- type: nauc_precision_at_5_max |
|
value: 57.19887955182096 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: nan |
|
- type: nauc_recall_at_1000_max |
|
value: nan |
|
- type: nauc_recall_at_100_diff1 |
|
value: nan |
|
- type: nauc_recall_at_100_max |
|
value: nan |
|
- type: nauc_recall_at_10_diff1 |
|
value: 61.500155617802555 |
|
- type: nauc_recall_at_10_max |
|
value: 47.88359788359823 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 67.21642606971449 |
|
- type: nauc_recall_at_1_max |
|
value: 19.793191631302918 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 56.13912231559305 |
|
- type: nauc_recall_at_20_max |
|
value: 21.825396825396858 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 70.79831932773116 |
|
- type: nauc_recall_at_3_max |
|
value: 47.461484593837426 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 58.506069094304394 |
|
- type: nauc_recall_at_5_max |
|
value: 57.19887955182054 |
|
- type: ndcg_at_1 |
|
value: 71.0 |
|
- type: ndcg_at_10 |
|
value: 85.226 |
|
- type: ndcg_at_100 |
|
value: 85.839 |
|
- type: ndcg_at_1000 |
|
value: 85.839 |
|
- type: ndcg_at_20 |
|
value: 85.458 |
|
- type: ndcg_at_3 |
|
value: 83.333 |
|
- type: ndcg_at_5 |
|
value: 84.58099999999999 |
|
- type: precision_at_1 |
|
value: 71.0 |
|
- type: precision_at_10 |
|
value: 9.700000000000001 |
|
- type: precision_at_100 |
|
value: 1.0 |
|
- type: precision_at_1000 |
|
value: 0.1 |
|
- type: precision_at_20 |
|
value: 4.9 |
|
- type: precision_at_3 |
|
value: 30.667 |
|
- type: precision_at_5 |
|
value: 19.0 |
|
- type: recall_at_1 |
|
value: 71.0 |
|
- type: recall_at_10 |
|
value: 97.0 |
|
- type: recall_at_100 |
|
value: 100.0 |
|
- type: recall_at_1000 |
|
value: 100.0 |
|
- type: recall_at_20 |
|
value: 98.0 |
|
- type: recall_at_3 |
|
value: 92.0 |
|
- type: recall_at_5 |
|
value: 95.0 |
|
- task: |
|
type: Retrieval |
|
dataset: |
|
type: jinaai/xpqa |
|
name: MTEB XPQARetrieval (fr) |
|
config: fr |
|
split: test |
|
revision: c99d599f0a6ab9b85b065da6f9d94f9cf731679f |
|
metrics: |
|
- type: map_at_1 |
|
value: 40.668 |
|
- type: map_at_10 |
|
value: 63.29900000000001 |
|
- type: map_at_100 |
|
value: 64.628 |
|
- type: map_at_1000 |
|
value: 64.683 |
|
- type: map_at_20 |
|
value: 64.156 |
|
- type: map_at_3 |
|
value: 56.858 |
|
- type: map_at_5 |
|
value: 61.072 |
|
- type: mrr_at_1 |
|
value: 63.28437917222964 |
|
- type: mrr_at_10 |
|
value: 71.24700659079828 |
|
- type: mrr_at_100 |
|
value: 71.73622475819593 |
|
- type: mrr_at_1000 |
|
value: 71.7489306936674 |
|
- type: mrr_at_20 |
|
value: 71.54825584541467 |
|
- type: mrr_at_3 |
|
value: 69.55941255006672 |
|
- type: mrr_at_5 |
|
value: 70.47396528704935 |
|
- type: nauc_map_at_1000_diff1 |
|
value: 43.35620080035366 |
|
- type: nauc_map_at_1000_max |
|
value: 50.28640921325736 |
|
- type: nauc_map_at_100_diff1 |
|
value: 43.31973658913103 |
|
- type: nauc_map_at_100_max |
|
value: 50.273258626884484 |
|
- type: nauc_map_at_10_diff1 |
|
value: 43.027049566983536 |
|
- type: nauc_map_at_10_max |
|
value: 49.577710318540966 |
|
- type: nauc_map_at_1_diff1 |
|
value: 54.26507755550101 |
|
- type: nauc_map_at_1_max |
|
value: 29.3055004033253 |
|
- type: nauc_map_at_20_diff1 |
|
value: 43.21822622085122 |
|
- type: nauc_map_at_20_max |
|
value: 50.055159148215544 |
|
- type: nauc_map_at_3_diff1 |
|
value: 46.17179722912072 |
|
- type: nauc_map_at_3_max |
|
value: 43.098861622889245 |
|
- type: nauc_map_at_5_diff1 |
|
value: 43.417118302901045 |
|
- type: nauc_map_at_5_max |
|
value: 47.855182277192995 |
|
- type: nauc_mrr_at_1000_diff1 |
|
value: 53.264166874886484 |
|
- type: nauc_mrr_at_1000_max |
|
value: 60.06399045079078 |
|
- type: nauc_mrr_at_100_diff1 |
|
value: 53.25723295738035 |
|
- type: nauc_mrr_at_100_max |
|
value: 60.064446692426365 |
|
- type: nauc_mrr_at_10_diff1 |
|
value: 53.27175189594254 |
|
- type: nauc_mrr_at_10_max |
|
value: 60.0620551274014 |
|
- type: nauc_mrr_at_1_diff1 |
|
value: 55.382898728149954 |
|
- type: nauc_mrr_at_1_max |
|
value: 59.47364922562707 |
|
- type: nauc_mrr_at_20_diff1 |
|
value: 53.101546449165404 |
|
- type: nauc_mrr_at_20_max |
|
value: 59.98535813727071 |
|
- type: nauc_mrr_at_3_diff1 |
|
value: 53.83121615715132 |
|
- type: nauc_mrr_at_3_max |
|
value: 60.86140499580485 |
|
- type: nauc_mrr_at_5_diff1 |
|
value: 53.17340169131113 |
|
- type: nauc_mrr_at_5_max |
|
value: 60.323733961935865 |
|
- type: nauc_ndcg_at_1000_diff1 |
|
value: 45.529862481919835 |
|
- type: nauc_ndcg_at_1000_max |
|
value: 54.19889340138254 |
|
- type: nauc_ndcg_at_100_diff1 |
|
value: 45.03283772116745 |
|
- type: nauc_ndcg_at_100_max |
|
value: 54.014933886963036 |
|
- type: nauc_ndcg_at_10_diff1 |
|
value: 43.69847706677576 |
|
- type: nauc_ndcg_at_10_max |
|
value: 51.997083339083474 |
|
- type: nauc_ndcg_at_1_diff1 |
|
value: 55.382898728149954 |
|
- type: nauc_ndcg_at_1_max |
|
value: 59.47364922562707 |
|
- type: nauc_ndcg_at_20_diff1 |
|
value: 43.97031810457665 |
|
- type: nauc_ndcg_at_20_max |
|
value: 52.75113969394979 |
|
- type: nauc_ndcg_at_3_diff1 |
|
value: 45.15249621607577 |
|
- type: nauc_ndcg_at_3_max |
|
value: 51.97757108163661 |
|
- type: nauc_ndcg_at_5_diff1 |
|
value: 44.01197180455844 |
|
- type: nauc_ndcg_at_5_max |
|
value: 50.4940600552972 |
|
- type: nauc_precision_at_1000_diff1 |
|
value: -21.744958100458017 |
|
- type: nauc_precision_at_1000_max |
|
value: 17.992122779928053 |
|
- type: nauc_precision_at_100_diff1 |
|
value: -19.676955126243957 |
|
- type: nauc_precision_at_100_max |
|
value: 21.92261529052923 |
|
- type: nauc_precision_at_10_diff1 |
|
value: -12.153879041711848 |
|
- type: nauc_precision_at_10_max |
|
value: 30.632660221696995 |
|
- type: nauc_precision_at_1_diff1 |
|
value: 55.382898728149954 |
|
- type: nauc_precision_at_1_max |
|
value: 59.47364922562707 |
|
- type: nauc_precision_at_20_diff1 |
|
value: -15.083687263517998 |
|
- type: nauc_precision_at_20_max |
|
value: 26.855087773361202 |
|
- type: nauc_precision_at_3_diff1 |
|
value: 2.4635804150765113 |
|
- type: nauc_precision_at_3_max |
|
value: 41.11369929685033 |
|
- type: nauc_precision_at_5_diff1 |
|
value: -6.912714357985636 |
|
- type: nauc_precision_at_5_max |
|
value: 35.72995297460379 |
|
- type: nauc_recall_at_1000_diff1 |
|
value: 71.02370020243924 |
|
- type: nauc_recall_at_1000_max |
|
value: 27.48289323103369 |
|
- type: nauc_recall_at_100_diff1 |
|
value: 29.646214405433696 |
|
- type: nauc_recall_at_100_max |
|
value: 44.07221611142022 |
|
- type: nauc_recall_at_10_diff1 |
|
value: 31.939036367001002 |
|
- type: nauc_recall_at_10_max |
|
value: 41.20048321364925 |
|
- type: nauc_recall_at_1_diff1 |
|
value: 54.26507755550101 |
|
- type: nauc_recall_at_1_max |
|
value: 29.3055004033253 |
|
- type: nauc_recall_at_20_diff1 |
|
value: 29.698861624429636 |
|
- type: nauc_recall_at_20_max |
|
value: 41.33416829563071 |
|
- type: nauc_recall_at_3_diff1 |
|
value: 41.73527831566349 |
|
- type: nauc_recall_at_3_max |
|
value: 38.73426347266254 |
|
- type: nauc_recall_at_5_diff1 |
|
value: 35.44302402135149 |
|
- type: nauc_recall_at_5_max |
|
value: 42.141691917800586 |
|
- type: ndcg_at_1 |
|
value: 63.284 |
|
- type: ndcg_at_10 |
|
value: 69.503 |
|
- type: ndcg_at_100 |
|
value: 73.687 |
|
- type: ndcg_at_1000 |
|
value: 74.52499999999999 |
|
- type: ndcg_at_20 |
|
value: 71.50800000000001 |
|
- type: ndcg_at_3 |
|
value: 64.434 |
|
- type: ndcg_at_5 |
|
value: 65.996 |
|
- type: precision_at_1 |
|
value: 63.284 |
|
- type: precision_at_10 |
|
value: 16.048000000000002 |
|
- type: precision_at_100 |
|
value: 1.955 |
|
- type: precision_at_1000 |
|
value: 0.20600000000000002 |
|
- type: precision_at_20 |
|
value: 8.778 |
|
- type: precision_at_3 |
|
value: 39.163 |
|
- type: precision_at_5 |
|
value: 28.037 |
|
- type: recall_at_1 |
|
value: 40.668 |
|
- type: recall_at_10 |
|
value: 78.956 |
|
- type: recall_at_100 |
|
value: 94.504 |
|
- type: recall_at_1000 |
|
value: 99.833 |
|
- type: recall_at_20 |
|
value: 85.085 |
|
- type: recall_at_3 |
|
value: 62.379 |
|
- type: recall_at_5 |
|
value: 70.254 |
|
license: apache-2.0 |
|
language: |
|
- fr |
|
metrics: |
|
- pearsonr |
|
- spearmanr |
|
--- |
|
|
|
# [bilingual-document-embedding](https://huggingface.co/Lajavaness/bilingual-document-embedding) |
|
|
|
bilingual-document-embedding is the Embedding Model for document in bilingual language: french and english with context length up to 8096 tokens . This model is a specialized sentence-embedding trained specifically for the bilingual language, leveraging the robust capabilities of [BGE M3](https://huggingface.co/BAAI/bge-m3), a pre-trained language model larged on the [BGE M3](https://huggingface.co/BAAI/bge-m3) architecture. The model utilizes xlm-roberta to encode english-french sentences into a 1024-dimensional vector space, facilitating a wide range of applications from semantic search to text clustering. The embeddings capture the nuanced meanings of english-french sentences, reflecting both the lexical and contextual layers of the language. |
|
|
|
|
|
## Full Model Architecture |
|
``` |
|
SentenceTransformer( |
|
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BilingualModel |
|
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) |
|
(2): Normalize() |
|
) |
|
``` |
|
|
|
## Training and Fine-tuning process |
|
#### Stage 1: NLI Training |
|
- Dataset: [(SNLI+XNLI) for english+french] |
|
- Method: Training using Multi-Negative Ranking Loss. This stage focused on improving the model's ability to discern and rank nuanced differences in sentence semantics. |
|
### Stage 3: Continued Fine-tuning for Semantic Textual Similarity on STS Benchmark |
|
- Dataset: [STSB-fr and en] |
|
- Method: Fine-tuning specifically for the semantic textual similarity benchmark using Siamese BERT-Networks configured with the 'sentence-transformers' library. |
|
### Stage 4: Advanced Augmentation Fine-tuning |
|
- Dataset: STSB with generate [silver sample from gold sample](https://www.sbert.net/examples/training/data_augmentation/README.html) |
|
- Method: Employed an advanced strategy using [Augmented SBERT](https://arxiv.org/abs/2010.08240) with Pair Sampling Strategies, integrating both Cross-Encoder and Bi-Encoder models. This stage further refined the embeddings by enriching the training data dynamically, enhancing the model's robustness and accuracy. |
|
|
|
|
|
## Usage: |
|
|
|
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed: |
|
|
|
``` |
|
pip install -U sentence-transformers |
|
``` |
|
|
|
Then you can use the model like this: |
|
|
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
|
|
sentences = ["Paris est une capitale de la France", "Paris is a capital of France"] |
|
|
|
model = SentenceTransformer('Lajavaness/bilingual-document-embedding', trust_remote_code=True) |
|
print(embeddings) |
|
|
|
``` |
|
|
|
|
|
|
|
|
|
|
|
## Evaluation |
|
|
|
TODO |
|
|
|
## Citation |
|
@article{chen2024bge, |
|
title={Bge m3-embedding: Multi-lingual, multi-functionality, multi-granularity text embeddings through self-knowledge distillation}, |
|
author={Chen, Jianlv and Xiao, Shitao and Zhang, Peitian and Luo, Kun and Lian, Defu and Liu, Zheng}, |
|
journal={arXiv preprint arXiv:2402.03216}, |
|
year={2024} |
|
} |
|
|
|
@article{conneau2019unsupervised, |
|
title={Unsupervised cross-lingual representation learning at scale}, |
|
author={Conneau, Alexis and Khandelwal, Kartikay and Goyal, Naman and Chaudhary, Vishrav and Wenzek, Guillaume and Guzm{\'a}n, Francisco and Grave, Edouard and Ott, Myle and Zettlemoyer, Luke and Stoyanov, Veselin}, |
|
journal={arXiv preprint arXiv:1911.02116}, |
|
year={2019} |
|
} |
|
|
|
@article{reimers2019sentence, |
|
title={Sentence-BERT: Sentence Embeddings using Siamese BERT-Networks}, |
|
author={Nils Reimers, Iryna Gurevych}, |
|
journal={https://arxiv.org/abs/1908.10084}, |
|
year={2019} |
|
} |
|
|
|
@article{thakur2020augmented, |
|
title={Augmented SBERT: Data Augmentation Method for Improving Bi-Encoders for Pairwise Sentence Scoring Tasks}, |
|
author={Thakur, Nandan and Reimers, Nils and Daxenberger, Johannes and Gurevych, Iryna}, |
|
journal={arXiv e-prints}, |
|
pages={arXiv--2010}, |
|
year={2020} |