|
--- |
|
library_name: sentence-transformers |
|
model-index: |
|
- name: XYZ-embedding-zh |
|
results: |
|
- dataset: |
|
config: default |
|
name: MTEB CMedQAv1 |
|
revision: None |
|
split: test |
|
type: C-MTEB/CMedQAv1-reranking |
|
metrics: |
|
- type: map |
|
value: 89.61792115239176 |
|
- type: mrr |
|
value: 91.46722222222222 |
|
- type: main_score |
|
value: 89.61792115239176 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB CMedQAv2 |
|
revision: None |
|
split: test |
|
type: C-MTEB/CMedQAv2-reranking |
|
metrics: |
|
- type: map |
|
value: 89.22040591564271 |
|
- type: mrr |
|
value: 91.2995238095238 |
|
- type: main_score |
|
value: 89.22040591564271 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB CmedqaRetrieval |
|
revision: None |
|
split: dev |
|
type: C-MTEB/CmedqaRetrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 27.939000000000004 |
|
- type: map_at_10 |
|
value: 41.227999999999994 |
|
- type: map_at_100 |
|
value: 43.018 |
|
- type: map_at_1000 |
|
value: 43.120000000000005 |
|
- type: map_at_3 |
|
value: 36.895 |
|
- type: map_at_5 |
|
value: 39.373999999999995 |
|
- type: mrr_at_1 |
|
value: 42.136 |
|
- type: mrr_at_10 |
|
value: 50.394000000000005 |
|
- type: mrr_at_100 |
|
value: 51.288 |
|
- type: mrr_at_1000 |
|
value: 51.324000000000005 |
|
- type: mrr_at_3 |
|
value: 47.887 |
|
- type: mrr_at_5 |
|
value: 49.362 |
|
- type: ndcg_at_1 |
|
value: 42.136 |
|
- type: ndcg_at_10 |
|
value: 47.899 |
|
- type: ndcg_at_100 |
|
value: 54.730999999999995 |
|
- type: ndcg_at_1000 |
|
value: 56.462999999999994 |
|
- type: ndcg_at_3 |
|
value: 42.66 |
|
- type: ndcg_at_5 |
|
value: 44.913 |
|
- type: precision_at_1 |
|
value: 42.136 |
|
- type: precision_at_10 |
|
value: 10.52 |
|
- type: precision_at_100 |
|
value: 1.6070000000000002 |
|
- type: precision_at_1000 |
|
value: 0.183 |
|
- type: precision_at_3 |
|
value: 24.064 |
|
- type: precision_at_5 |
|
value: 17.374000000000002 |
|
- type: recall_at_1 |
|
value: 27.939000000000004 |
|
- type: recall_at_10 |
|
value: 58.29600000000001 |
|
- type: recall_at_100 |
|
value: 86.504 |
|
- type: recall_at_1000 |
|
value: 98.105 |
|
- type: recall_at_3 |
|
value: 42.475 |
|
- type: recall_at_5 |
|
value: 49.454 |
|
- type: main_score |
|
value: 47.899 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB CovidRetrieval |
|
revision: None |
|
split: dev |
|
type: C-MTEB/CovidRetrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 77.371 |
|
- type: map_at_10 |
|
value: 85.229 |
|
- type: map_at_100 |
|
value: 85.358 |
|
- type: map_at_1000 |
|
value: 85.36 |
|
- type: map_at_3 |
|
value: 84.176 |
|
- type: map_at_5 |
|
value: 84.79299999999999 |
|
- type: mrr_at_1 |
|
value: 77.661 |
|
- type: mrr_at_10 |
|
value: 85.207 |
|
- type: mrr_at_100 |
|
value: 85.33699999999999 |
|
- type: mrr_at_1000 |
|
value: 85.339 |
|
- type: mrr_at_3 |
|
value: 84.229 |
|
- type: mrr_at_5 |
|
value: 84.79299999999999 |
|
- type: ndcg_at_1 |
|
value: 77.766 |
|
- type: ndcg_at_10 |
|
value: 88.237 |
|
- type: ndcg_at_100 |
|
value: 88.777 |
|
- type: ndcg_at_1000 |
|
value: 88.818 |
|
- type: ndcg_at_3 |
|
value: 86.16 |
|
- type: ndcg_at_5 |
|
value: 87.22 |
|
- type: precision_at_1 |
|
value: 77.766 |
|
- type: precision_at_10 |
|
value: 9.841999999999999 |
|
- type: precision_at_100 |
|
value: 1.0070000000000001 |
|
- type: precision_at_1000 |
|
value: 0.101 |
|
- type: precision_at_3 |
|
value: 30.875000000000004 |
|
- type: precision_at_5 |
|
value: 19.073 |
|
- type: recall_at_1 |
|
value: 77.371 |
|
- type: recall_at_10 |
|
value: 97.366 |
|
- type: recall_at_100 |
|
value: 99.684 |
|
- type: recall_at_1000 |
|
value: 100.0 |
|
- type: recall_at_3 |
|
value: 91.702 |
|
- type: recall_at_5 |
|
value: 94.31 |
|
- type: main_score |
|
value: 88.237 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB DuRetrieval |
|
revision: None |
|
split: dev |
|
type: C-MTEB/DuRetrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 27.772000000000002 |
|
- type: map_at_10 |
|
value: 84.734 |
|
- type: map_at_100 |
|
value: 87.298 |
|
- type: map_at_1000 |
|
value: 87.32900000000001 |
|
- type: map_at_3 |
|
value: 59.431 |
|
- type: map_at_5 |
|
value: 74.82900000000001 |
|
- type: mrr_at_1 |
|
value: 93.65 |
|
- type: mrr_at_10 |
|
value: 95.568 |
|
- type: mrr_at_100 |
|
value: 95.608 |
|
- type: mrr_at_1000 |
|
value: 95.609 |
|
- type: mrr_at_3 |
|
value: 95.267 |
|
- type: mrr_at_5 |
|
value: 95.494 |
|
- type: ndcg_at_1 |
|
value: 93.65 |
|
- type: ndcg_at_10 |
|
value: 90.794 |
|
- type: ndcg_at_100 |
|
value: 92.88300000000001 |
|
- type: ndcg_at_1000 |
|
value: 93.144 |
|
- type: ndcg_at_3 |
|
value: 90.32 |
|
- type: ndcg_at_5 |
|
value: 89.242 |
|
- type: precision_at_1 |
|
value: 93.65 |
|
- type: precision_at_10 |
|
value: 42.9 |
|
- type: precision_at_100 |
|
value: 4.835 |
|
- type: precision_at_1000 |
|
value: 0.49 |
|
- type: precision_at_3 |
|
value: 80.85 |
|
- type: precision_at_5 |
|
value: 68.14 |
|
- type: recall_at_1 |
|
value: 27.772000000000002 |
|
- type: recall_at_10 |
|
value: 91.183 |
|
- type: recall_at_100 |
|
value: 98.219 |
|
- type: recall_at_1000 |
|
value: 99.55000000000001 |
|
- type: recall_at_3 |
|
value: 60.911 |
|
- type: recall_at_5 |
|
value: 78.31099999999999 |
|
- type: main_score |
|
value: 90.794 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB EcomRetrieval |
|
revision: None |
|
split: dev |
|
type: C-MTEB/EcomRetrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 54.6 |
|
- type: map_at_10 |
|
value: 64.742 |
|
- type: map_at_100 |
|
value: 65.289 |
|
- type: map_at_1000 |
|
value: 65.29700000000001 |
|
- type: map_at_3 |
|
value: 62.183 |
|
- type: map_at_5 |
|
value: 63.883 |
|
- type: mrr_at_1 |
|
value: 54.6 |
|
- type: mrr_at_10 |
|
value: 64.742 |
|
- type: mrr_at_100 |
|
value: 65.289 |
|
- type: mrr_at_1000 |
|
value: 65.29700000000001 |
|
- type: mrr_at_3 |
|
value: 62.183 |
|
- type: mrr_at_5 |
|
value: 63.883 |
|
- type: ndcg_at_1 |
|
value: 54.6 |
|
- type: ndcg_at_10 |
|
value: 69.719 |
|
- type: ndcg_at_100 |
|
value: 72.148 |
|
- type: ndcg_at_1000 |
|
value: 72.393 |
|
- type: ndcg_at_3 |
|
value: 64.606 |
|
- type: ndcg_at_5 |
|
value: 67.682 |
|
- type: precision_at_1 |
|
value: 54.6 |
|
- type: precision_at_10 |
|
value: 8.53 |
|
- type: precision_at_100 |
|
value: 0.962 |
|
- type: precision_at_1000 |
|
value: 0.098 |
|
- type: precision_at_3 |
|
value: 23.867 |
|
- type: precision_at_5 |
|
value: 15.82 |
|
- type: recall_at_1 |
|
value: 54.6 |
|
- type: recall_at_10 |
|
value: 85.3 |
|
- type: recall_at_100 |
|
value: 96.2 |
|
- type: recall_at_1000 |
|
value: 98.2 |
|
- type: recall_at_3 |
|
value: 71.6 |
|
- type: recall_at_5 |
|
value: 79.10000000000001 |
|
- type: main_score |
|
value: 69.719 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB MMarcoReranking |
|
revision: None |
|
split: dev |
|
type: C-MTEB/Mmarco-reranking |
|
metrics: |
|
- type: map |
|
value: 35.30260957061897 |
|
- type: mrr |
|
value: 34.098015873015875 |
|
- type: main_score |
|
value: 35.30260957061897 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB MMarcoRetrieval |
|
revision: None |
|
split: dev |
|
type: C-MTEB/MMarcoRetrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 69.51899999999999 |
|
- type: map_at_10 |
|
value: 78.816 |
|
- type: map_at_100 |
|
value: 79.08500000000001 |
|
- type: map_at_1000 |
|
value: 79.091 |
|
- type: map_at_3 |
|
value: 76.999 |
|
- type: map_at_5 |
|
value: 78.194 |
|
- type: mrr_at_1 |
|
value: 71.80499999999999 |
|
- type: mrr_at_10 |
|
value: 79.29899999999999 |
|
- type: mrr_at_100 |
|
value: 79.532 |
|
- type: mrr_at_1000 |
|
value: 79.537 |
|
- type: mrr_at_3 |
|
value: 77.703 |
|
- type: mrr_at_5 |
|
value: 78.75999999999999 |
|
- type: ndcg_at_1 |
|
value: 71.80499999999999 |
|
- type: ndcg_at_10 |
|
value: 82.479 |
|
- type: ndcg_at_100 |
|
value: 83.611 |
|
- type: ndcg_at_1000 |
|
value: 83.76400000000001 |
|
- type: ndcg_at_3 |
|
value: 79.065 |
|
- type: ndcg_at_5 |
|
value: 81.092 |
|
- type: precision_at_1 |
|
value: 71.80499999999999 |
|
- type: precision_at_10 |
|
value: 9.91 |
|
- type: precision_at_100 |
|
value: 1.046 |
|
- type: precision_at_1000 |
|
value: 0.106 |
|
- type: precision_at_3 |
|
value: 29.727999999999998 |
|
- type: precision_at_5 |
|
value: 18.908 |
|
- type: recall_at_1 |
|
value: 69.51899999999999 |
|
- type: recall_at_10 |
|
value: 93.24 |
|
- type: recall_at_100 |
|
value: 98.19099999999999 |
|
- type: recall_at_1000 |
|
value: 99.36500000000001 |
|
- type: recall_at_3 |
|
value: 84.308 |
|
- type: recall_at_5 |
|
value: 89.119 |
|
- type: main_score |
|
value: 82.479 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB MedicalRetrieval |
|
revision: None |
|
split: dev |
|
type: C-MTEB/MedicalRetrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 57.8 |
|
- type: map_at_10 |
|
value: 64.215 |
|
- type: map_at_100 |
|
value: 64.78 |
|
- type: map_at_1000 |
|
value: 64.81099999999999 |
|
- type: map_at_3 |
|
value: 62.64999999999999 |
|
- type: map_at_5 |
|
value: 63.57000000000001 |
|
- type: mrr_at_1 |
|
value: 58.099999999999994 |
|
- type: mrr_at_10 |
|
value: 64.371 |
|
- type: mrr_at_100 |
|
value: 64.936 |
|
- type: mrr_at_1000 |
|
value: 64.96600000000001 |
|
- type: mrr_at_3 |
|
value: 62.8 |
|
- type: mrr_at_5 |
|
value: 63.739999999999995 |
|
- type: ndcg_at_1 |
|
value: 57.8 |
|
- type: ndcg_at_10 |
|
value: 67.415 |
|
- type: ndcg_at_100 |
|
value: 70.38799999999999 |
|
- type: ndcg_at_1000 |
|
value: 71.229 |
|
- type: ndcg_at_3 |
|
value: 64.206 |
|
- type: ndcg_at_5 |
|
value: 65.858 |
|
- type: precision_at_1 |
|
value: 57.8 |
|
- type: precision_at_10 |
|
value: 7.75 |
|
- type: precision_at_100 |
|
value: 0.919 |
|
- type: precision_at_1000 |
|
value: 0.099 |
|
- type: precision_at_3 |
|
value: 22.900000000000002 |
|
- type: precision_at_5 |
|
value: 14.540000000000001 |
|
- type: recall_at_1 |
|
value: 57.8 |
|
- type: recall_at_10 |
|
value: 77.5 |
|
- type: recall_at_100 |
|
value: 91.9 |
|
- type: recall_at_1000 |
|
value: 98.6 |
|
- type: recall_at_3 |
|
value: 68.7 |
|
- type: recall_at_5 |
|
value: 72.7 |
|
- type: main_score |
|
value: 67.415 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB T2Reranking |
|
revision: None |
|
split: dev |
|
type: C-MTEB/T2Reranking |
|
metrics: |
|
- type: map |
|
value: 69.06615146698508 |
|
- type: mrr |
|
value: 79.7588755091294 |
|
- type: main_score |
|
value: 69.06615146698508 |
|
task: |
|
type: Reranking |
|
- dataset: |
|
config: default |
|
name: MTEB T2Retrieval |
|
revision: None |
|
split: dev |
|
type: C-MTEB/T2Retrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 28.084999999999997 |
|
- type: map_at_10 |
|
value: 78.583 |
|
- type: map_at_100 |
|
value: 82.14399999999999 |
|
- type: map_at_1000 |
|
value: 82.204 |
|
- type: map_at_3 |
|
value: 55.422000000000004 |
|
- type: map_at_5 |
|
value: 67.973 |
|
- type: mrr_at_1 |
|
value: 91.014 |
|
- type: mrr_at_10 |
|
value: 93.381 |
|
- type: mrr_at_100 |
|
value: 93.45400000000001 |
|
- type: mrr_at_1000 |
|
value: 93.45599999999999 |
|
- type: mrr_at_3 |
|
value: 92.99300000000001 |
|
- type: mrr_at_5 |
|
value: 93.234 |
|
- type: ndcg_at_1 |
|
value: 91.014 |
|
- type: ndcg_at_10 |
|
value: 85.931 |
|
- type: ndcg_at_100 |
|
value: 89.31 |
|
- type: ndcg_at_1000 |
|
value: 89.869 |
|
- type: ndcg_at_3 |
|
value: 87.348 |
|
- type: ndcg_at_5 |
|
value: 85.929 |
|
- type: precision_at_1 |
|
value: 91.014 |
|
- type: precision_at_10 |
|
value: 42.495 |
|
- type: precision_at_100 |
|
value: 5.029999999999999 |
|
- type: precision_at_1000 |
|
value: 0.516 |
|
- type: precision_at_3 |
|
value: 76.248 |
|
- type: precision_at_5 |
|
value: 63.817 |
|
- type: recall_at_1 |
|
value: 28.084999999999997 |
|
- type: recall_at_10 |
|
value: 84.88 |
|
- type: recall_at_100 |
|
value: 95.902 |
|
- type: recall_at_1000 |
|
value: 98.699 |
|
- type: recall_at_3 |
|
value: 57.113 |
|
- type: recall_at_5 |
|
value: 71.251 |
|
- type: main_score |
|
value: 85.931 |
|
task: |
|
type: Retrieval |
|
- dataset: |
|
config: default |
|
name: MTEB VideoRetrieval |
|
revision: None |
|
split: dev |
|
type: C-MTEB/VideoRetrieval |
|
metrics: |
|
- type: map_at_1 |
|
value: 66.4 |
|
- type: map_at_10 |
|
value: 75.86 |
|
- type: map_at_100 |
|
value: 76.185 |
|
- type: map_at_1000 |
|
value: 76.188 |
|
- type: map_at_3 |
|
value: 74.167 |
|
- type: map_at_5 |
|
value: 75.187 |
|
- type: mrr_at_1 |
|
value: 66.4 |
|
- type: mrr_at_10 |
|
value: 75.86 |
|
- type: mrr_at_100 |
|
value: 76.185 |
|
- type: mrr_at_1000 |
|
value: 76.188 |
|
- type: mrr_at_3 |
|
value: 74.167 |
|
- type: mrr_at_5 |
|
value: 75.187 |
|
- type: ndcg_at_1 |
|
value: 66.4 |
|
- type: ndcg_at_10 |
|
value: 80.03099999999999 |
|
- type: ndcg_at_100 |
|
value: 81.459 |
|
- type: ndcg_at_1000 |
|
value: 81.527 |
|
- type: ndcg_at_3 |
|
value: 76.621 |
|
- type: ndcg_at_5 |
|
value: 78.446 |
|
- type: precision_at_1 |
|
value: 66.4 |
|
- type: precision_at_10 |
|
value: 9.29 |
|
- type: precision_at_100 |
|
value: 0.992 |
|
- type: precision_at_1000 |
|
value: 0.1 |
|
- type: precision_at_3 |
|
value: 27.900000000000002 |
|
- type: precision_at_5 |
|
value: 17.62 |
|
- type: recall_at_1 |
|
value: 66.4 |
|
- type: recall_at_10 |
|
value: 92.9 |
|
- type: recall_at_100 |
|
value: 99.2 |
|
- type: recall_at_1000 |
|
value: 99.7 |
|
- type: recall_at_3 |
|
value: 83.7 |
|
- type: recall_at_5 |
|
value: 88.1 |
|
- type: main_score |
|
value: 80.03099999999999 |
|
task: |
|
type: Retrieval |
|
pipeline_tag: sentence-similarity |
|
tags: |
|
- sentence-transformers |
|
- feature-extraction |
|
- sentence-similarity |
|
- mteb |
|
--- |
|
- |
|
|
|
# XYZ-embedding-zh |
|
|
|
This is a [sentence-transformers](https://www.SBERT.net) model: It maps sentences & paragraphs to a 1792 dimensional dense vector space and can be used for tasks like clustering or semantic search. |
|
|
|
<!--- Describe your model here --> |
|
|
|
## Usage (Sentence-Transformers) |
|
|
|
Using this model becomes easy when you have [sentence-transformers](https://www.SBERT.net) installed: |
|
|
|
``` |
|
pip install -U sentence-transformers |
|
``` |
|
|
|
Then you can use the model like this: |
|
|
|
```python |
|
from sentence_transformers import SentenceTransformer |
|
sentences = ["This is an example sentence", "Each sentence is converted"] |
|
|
|
model = SentenceTransformer('fangxq/XYZ-embedding-zh') |
|
embeddings = model.encode(sentences) |
|
print(embeddings) |
|
``` |
|
|
|
|
|
|
|
## Evaluation Results |
|
|
|
<!--- Describe how your model was evaluated --> |
|
|
|
For an automated evaluation of this model, see the *Sentence Embeddings Benchmark*: [https://seb.sbert.net](https://seb.sbert.net?model_name={MODEL_NAME}) |
|
|
|
|
|
|
|
## Full Model Architecture |
|
``` |
|
SentenceTransformer( |
|
(0): Transformer({'max_seq_length': 512, 'do_lower_case': False}) with Transformer model: BertModel |
|
(1): Pooling({'word_embedding_dimension': 1024, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True}) |
|
(2): Dense({'in_features': 1024, 'out_features': 1792, 'bias': True, 'activation_function': 'torch.nn.modules.linear.Identity'}) |
|
) |
|
``` |
|
|
|
## Citing & Authors |
|
|
|
|