bwang0911 commited on
Commit
64a9a01
·
verified ·
1 Parent(s): 9e5cf6d

Add new SentenceTransformer model

Browse files
1_Pooling/config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "word_embedding_dimension": 384,
3
  "pooling_mode_cls_token": false,
4
  "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
 
1
  {
2
+ "word_embedding_dimension": 768,
3
  "pooling_mode_cls_token": false,
4
  "pooling_mode_mean_tokens": true,
5
  "pooling_mode_max_tokens": false,
README.md CHANGED
@@ -8,7 +8,7 @@ tags:
8
  - generated_from_trainer
9
  - dataset_size:53224
10
  - loss:MultipleNegativesRankingLoss
11
- base_model: sentence-transformers/all-MiniLM-L6-v2
12
  widget:
13
  - source_sentence: ' A juridical person may not be a partner of a civil law union. '
14
  sentences:
@@ -220,7 +220,7 @@ metrics:
220
  - cosine_mrr@10
221
  - cosine_map@100
222
  model-index:
223
- - name: SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
224
  results:
225
  - task:
226
  type: information-retrieval
@@ -230,49 +230,49 @@ model-index:
230
  type: mteb/AILA_casedocs
231
  metrics:
232
  - type: cosine_accuracy@1
233
- value: 0.26
234
  name: Cosine Accuracy@1
235
  - type: cosine_accuracy@3
236
- value: 0.36
237
  name: Cosine Accuracy@3
238
  - type: cosine_accuracy@5
239
- value: 0.38
240
  name: Cosine Accuracy@5
241
  - type: cosine_accuracy@10
242
- value: 0.58
243
  name: Cosine Accuracy@10
244
  - type: cosine_precision@1
245
- value: 0.26
246
  name: Cosine Precision@1
247
  - type: cosine_precision@3
248
- value: 0.2
249
  name: Cosine Precision@3
250
  - type: cosine_precision@5
251
- value: 0.14
252
  name: Cosine Precision@5
253
  - type: cosine_precision@10
254
- value: 0.10599999999999998
255
  name: Cosine Precision@10
256
  - type: cosine_recall@1
257
- value: 0.08253846153846153
258
  name: Cosine Recall@1
259
  - type: cosine_recall@3
260
- value: 0.183986013986014
261
  name: Cosine Recall@3
262
  - type: cosine_recall@5
263
- value: 0.21322843822843823
264
  name: Cosine Recall@5
265
  - type: cosine_recall@10
266
- value: 0.30445687645687647
267
  name: Cosine Recall@10
268
  - type: cosine_ndcg@10
269
- value: 0.261956835808035
270
  name: Cosine Ndcg@10
271
  - type: cosine_mrr@10
272
- value: 0.3361349206349206
273
  name: Cosine Mrr@10
274
  - type: cosine_map@100
275
- value: 0.23084417119066455
276
  name: Cosine Map@100
277
  - task:
278
  type: information-retrieval
@@ -282,49 +282,49 @@ model-index:
282
  type: mteb/AILA_statutes
283
  metrics:
284
  - type: cosine_accuracy@1
285
- value: 0.26
286
  name: Cosine Accuracy@1
287
  - type: cosine_accuracy@3
288
- value: 0.44
289
  name: Cosine Accuracy@3
290
  - type: cosine_accuracy@5
291
- value: 0.54
292
  name: Cosine Accuracy@5
293
  - type: cosine_accuracy@10
294
- value: 0.7
295
  name: Cosine Accuracy@10
296
  - type: cosine_precision@1
297
- value: 0.26
298
  name: Cosine Precision@1
299
  - type: cosine_precision@3
300
- value: 0.16666666666666669
301
  name: Cosine Precision@3
302
  - type: cosine_precision@5
303
- value: 0.14400000000000002
304
  name: Cosine Precision@5
305
  - type: cosine_precision@10
306
- value: 0.10999999999999999
307
  name: Cosine Precision@10
308
  - type: cosine_recall@1
309
- value: 0.071
310
  name: Cosine Recall@1
311
  - type: cosine_recall@3
312
- value: 0.129
313
  name: Cosine Recall@3
314
  - type: cosine_recall@5
315
- value: 0.17700000000000002
316
  name: Cosine Recall@5
317
  - type: cosine_recall@10
318
- value: 0.2643333333333333
319
  name: Cosine Recall@10
320
  - type: cosine_ndcg@10
321
- value: 0.23332317287231785
322
  name: Cosine Ndcg@10
323
  - type: cosine_mrr@10
324
- value: 0.37441269841269836
325
  name: Cosine Mrr@10
326
  - type: cosine_map@100
327
- value: 0.2043241006581302
328
  name: Cosine Map@100
329
  - task:
330
  type: information-retrieval
@@ -334,49 +334,49 @@ model-index:
334
  type: mteb/legalbench_consumer_contracts_qa
335
  metrics:
336
  - type: cosine_accuracy@1
337
- value: 0.45202020202020204
338
  name: Cosine Accuracy@1
339
  - type: cosine_accuracy@3
340
- value: 0.6868686868686869
341
  name: Cosine Accuracy@3
342
  - type: cosine_accuracy@5
343
- value: 0.7878787878787878
344
  name: Cosine Accuracy@5
345
  - type: cosine_accuracy@10
346
- value: 0.8737373737373737
347
  name: Cosine Accuracy@10
348
  - type: cosine_precision@1
349
- value: 0.45202020202020204
350
  name: Cosine Precision@1
351
  - type: cosine_precision@3
352
- value: 0.22895622895622894
353
  name: Cosine Precision@3
354
  - type: cosine_precision@5
355
- value: 0.15757575757575756
356
  name: Cosine Precision@5
357
  - type: cosine_precision@10
358
- value: 0.08737373737373735
359
  name: Cosine Precision@10
360
  - type: cosine_recall@1
361
- value: 0.45202020202020204
362
  name: Cosine Recall@1
363
  - type: cosine_recall@3
364
- value: 0.6868686868686869
365
  name: Cosine Recall@3
366
  - type: cosine_recall@5
367
- value: 0.7878787878787878
368
  name: Cosine Recall@5
369
  - type: cosine_recall@10
370
- value: 0.8737373737373737
371
  name: Cosine Recall@10
372
  - type: cosine_ndcg@10
373
- value: 0.660855212722782
374
  name: Cosine Ndcg@10
375
  - type: cosine_mrr@10
376
- value: 0.5928561407728073
377
  name: Cosine Mrr@10
378
  - type: cosine_map@100
379
- value: 0.5987644318492056
380
  name: Cosine Map@100
381
  - task:
382
  type: information-retrieval
@@ -386,49 +386,49 @@ model-index:
386
  type: mteb/legalbench_corporate_lobbying
387
  metrics:
388
  - type: cosine_accuracy@1
389
- value: 0.7705882352941177
390
  name: Cosine Accuracy@1
391
  - type: cosine_accuracy@3
392
- value: 0.9088235294117647
393
  name: Cosine Accuracy@3
394
  - type: cosine_accuracy@5
395
- value: 0.9382352941176471
396
  name: Cosine Accuracy@5
397
  - type: cosine_accuracy@10
398
- value: 0.9705882352941176
399
  name: Cosine Accuracy@10
400
  - type: cosine_precision@1
401
- value: 0.7705882352941177
402
  name: Cosine Precision@1
403
  - type: cosine_precision@3
404
- value: 0.3029411764705882
405
  name: Cosine Precision@3
406
  - type: cosine_precision@5
407
- value: 0.18764705882352936
408
  name: Cosine Precision@5
409
  - type: cosine_precision@10
410
- value: 0.09705882352941174
411
  name: Cosine Precision@10
412
  - type: cosine_recall@1
413
- value: 0.7705882352941177
414
  name: Cosine Recall@1
415
  - type: cosine_recall@3
416
- value: 0.9088235294117647
417
  name: Cosine Recall@3
418
  - type: cosine_recall@5
419
- value: 0.9382352941176471
420
  name: Cosine Recall@5
421
  - type: cosine_recall@10
422
- value: 0.9705882352941176
423
  name: Cosine Recall@10
424
  - type: cosine_ndcg@10
425
- value: 0.877258980240739
426
  name: Cosine Ndcg@10
427
  - type: cosine_mrr@10
428
- value: 0.8466806722689075
429
  name: Cosine Mrr@10
430
  - type: cosine_map@100
431
- value: 0.8476651359451062
432
  name: Cosine Map@100
433
  - task:
434
  type: information-retrieval
@@ -438,63 +438,63 @@ model-index:
438
  type: mteb/legal_summarization
439
  metrics:
440
  - type: cosine_accuracy@1
441
- value: 0.4894366197183099
442
  name: Cosine Accuracy@1
443
  - type: cosine_accuracy@3
444
- value: 0.6408450704225352
445
  name: Cosine Accuracy@3
446
  - type: cosine_accuracy@5
447
- value: 0.7147887323943662
448
  name: Cosine Accuracy@5
449
  - type: cosine_accuracy@10
450
- value: 0.7816901408450704
451
  name: Cosine Accuracy@10
452
  - type: cosine_precision@1
453
- value: 0.4894366197183099
454
  name: Cosine Precision@1
455
  - type: cosine_precision@3
456
- value: 0.23591549295774647
457
  name: Cosine Precision@3
458
  - type: cosine_precision@5
459
- value: 0.16619718309859152
460
  name: Cosine Precision@5
461
  - type: cosine_precision@10
462
- value: 0.09753521126760564
463
  name: Cosine Precision@10
464
  - type: cosine_recall@1
465
- value: 0.4368868514114993
466
  name: Cosine Recall@1
467
  - type: cosine_recall@3
468
- value: 0.5753959362234009
469
  name: Cosine Recall@3
470
  - type: cosine_recall@5
471
- value: 0.6440091305408207
472
  name: Cosine Recall@5
473
  - type: cosine_recall@10
474
- value: 0.7159090909090909
475
  name: Cosine Recall@10
476
  - type: cosine_ndcg@10
477
- value: 0.596027060399293
478
  name: Cosine Ndcg@10
479
  - type: cosine_mrr@10
480
- value: 0.5833137715179968
481
  name: Cosine Mrr@10
482
  - type: cosine_map@100
483
- value: 0.5567992166327345
484
  name: Cosine Map@100
485
  ---
486
 
487
- # SentenceTransformer based on sentence-transformers/all-MiniLM-L6-v2
488
 
489
- This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) on the [coliee](https://huggingface.co/datasets/sentence-transformers/coliee), [legal_qa](https://huggingface.co/datasets/bwang0911/legal_qa_v1), [law_stack](https://huggingface.co/datasets/bwang0911/law_stackexchange), [legal_lens](https://huggingface.co/datasets/bwang0911/legal_lens_nli), [cuad_qa](https://huggingface.co/datasets/bwang0911/cuad_qa), [privacy_qa](https://huggingface.co/datasets/bwang0911/privacy_qa), [legal_sum](https://huggingface.co/datasets/bwang0911/legal_case_summarization) and [aus_legal_qa](https://huggingface.co/datasets/bwang0911/aus_legal_qa) datasets. It maps sentences & paragraphs to a 384-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
490
 
491
  ## Model Details
492
 
493
  ### Model Description
494
  - **Model Type:** Sentence Transformer
495
- - **Base model:** [sentence-transformers/all-MiniLM-L6-v2](https://huggingface.co/sentence-transformers/all-MiniLM-L6-v2) <!-- at revision fa97f6e7cb1a59073dff9e6b13e2715cf7475ac9 -->
496
- - **Maximum Sequence Length:** 256 tokens
497
- - **Output Dimensionality:** 384 dimensions
498
  - **Similarity Function:** Cosine Similarity
499
  - **Training Datasets:**
500
  - [coliee](https://huggingface.co/datasets/sentence-transformers/coliee)
@@ -518,8 +518,8 @@ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [s
518
 
519
  ```
520
  SentenceTransformer(
521
- (0): Transformer({'max_seq_length': 256, 'do_lower_case': False}) with Transformer model: BertModel
522
- (1): Pooling({'word_embedding_dimension': 384, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
523
  (2): Normalize()
524
  )
525
  ```
@@ -548,7 +548,7 @@ sentences = [
548
  ]
549
  embeddings = model.encode(sentences)
550
  print(embeddings.shape)
551
- # [3, 384]
552
 
553
  # Get the similarity scores for the embeddings
554
  similarities = model.similarity(embeddings, embeddings)
@@ -591,21 +591,21 @@ You can finetune this model on your own dataset.
591
 
592
  | Metric | mteb/AILA_casedocs | mteb/AILA_statutes | mteb/legalbench_consumer_contracts_qa | mteb/legalbench_corporate_lobbying | mteb/legal_summarization |
593
  |:--------------------|:-------------------|:-------------------|:--------------------------------------|:-----------------------------------|:-------------------------|
594
- | cosine_accuracy@1 | 0.26 | 0.26 | 0.452 | 0.7706 | 0.4894 |
595
- | cosine_accuracy@3 | 0.36 | 0.44 | 0.6869 | 0.9088 | 0.6408 |
596
- | cosine_accuracy@5 | 0.38 | 0.54 | 0.7879 | 0.9382 | 0.7148 |
597
- | cosine_accuracy@10 | 0.58 | 0.7 | 0.8737 | 0.9706 | 0.7817 |
598
- | cosine_precision@1 | 0.26 | 0.26 | 0.452 | 0.7706 | 0.4894 |
599
- | cosine_precision@3 | 0.2 | 0.1667 | 0.229 | 0.3029 | 0.2359 |
600
- | cosine_precision@5 | 0.14 | 0.144 | 0.1576 | 0.1876 | 0.1662 |
601
- | cosine_precision@10 | 0.106 | 0.11 | 0.0874 | 0.0971 | 0.0975 |
602
- | cosine_recall@1 | 0.0825 | 0.071 | 0.452 | 0.7706 | 0.4369 |
603
- | cosine_recall@3 | 0.184 | 0.129 | 0.6869 | 0.9088 | 0.5754 |
604
- | cosine_recall@5 | 0.2132 | 0.177 | 0.7879 | 0.9382 | 0.644 |
605
- | cosine_recall@10 | 0.3045 | 0.2643 | 0.8737 | 0.9706 | 0.7159 |
606
- | **cosine_ndcg@10** | **0.262** | **0.2333** | **0.6609** | **0.8773** | **0.596** |
607
- | cosine_mrr@10 | 0.3361 | 0.3744 | 0.5929 | 0.8467 | 0.5833 |
608
- | cosine_map@100 | 0.2308 | 0.2043 | 0.5988 | 0.8477 | 0.5568 |
609
 
610
  <!--
611
  ## Bias, Risks and Limitations
@@ -632,7 +632,7 @@ You can finetune this model on your own dataset.
632
  | | anchor | positive | negative |
633
  |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
634
  | type | string | string | string |
635
- | details | <ul><li>min: 11 tokens</li><li>mean: 41.76 tokens</li><li>max: 99 tokens</li></ul> | <ul><li>min: 25 tokens</li><li>mean: 131.1 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 14 tokens</li><li>mean: 121.07 tokens</li><li>max: 256 tokens</li></ul> |
636
  * Samples:
637
  | anchor | positive | negative |
638
  |:-------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -653,10 +653,10 @@ You can finetune this model on your own dataset.
653
  * Size: 3,742 training samples
654
  * Columns: <code>anchor</code> and <code>positive</code>
655
  * Approximate statistics based on the first 1000 samples:
656
- | | anchor | positive |
657
- |:--------|:------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
658
- | type | string | string |
659
- | details | <ul><li>min: 13 tokens</li><li>mean: 116.0 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 147.66 tokens</li><li>max: 256 tokens</li></ul> |
660
  * Samples:
661
  | anchor | positive |
662
  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -680,7 +680,7 @@ You can finetune this model on your own dataset.
680
  | | anchor | positive |
681
  |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
682
  | type | string | string |
683
- | details | <ul><li>min: 27 tokens</li><li>mean: 161.25 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 203.26 tokens</li><li>max: 256 tokens</li></ul> |
684
  * Samples:
685
  | anchor | positive |
686
  |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -701,10 +701,10 @@ You can finetune this model on your own dataset.
701
  * Size: 107 training samples
702
  * Columns: <code>anchor</code> and <code>positive</code>
703
  * Approximate statistics based on the first 107 samples:
704
- | | anchor | positive |
705
- |:--------|:--------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
706
- | type | string | string |
707
- | details | <ul><li>min: 107 tokens</li><li>mean: 168.68 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 27 tokens</li><li>mean: 82.2 tokens</li><li>max: 256 tokens</li></ul> |
708
  * Samples:
709
  | anchor | positive |
710
  |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -725,10 +725,10 @@ You can finetune this model on your own dataset.
725
  * Size: 11,180 training samples
726
  * Columns: <code>anchor</code> and <code>positive</code>
727
  * Approximate statistics based on the first 1000 samples:
728
- | | anchor | positive |
729
- |:--------|:------------------------------------------------------------------------------------|:-----------------------------------------------------------------------------------|
730
- | type | string | string |
731
- | details | <ul><li>min: 33 tokens</li><li>mean: 51.31 tokens</li><li>max: 105 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 58.93 tokens</li><li>max: 256 tokens</li></ul> |
732
  * Samples:
733
  | anchor | positive |
734
  |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------|
@@ -776,7 +776,7 @@ You can finetune this model on your own dataset.
776
  | | anchor | positive |
777
  |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
778
  | type | string | string |
779
- | details | <ul><li>min: 256 tokens</li><li>mean: 256.0 tokens</li><li>max: 256 tokens</li></ul> | <ul><li>min: 63 tokens</li><li>mean: 253.19 tokens</li><li>max: 256 tokens</li></ul> |
780
  * Samples:
781
  | anchor | positive |
782
  |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -800,7 +800,7 @@ You can finetune this model on your own dataset.
800
  | | anchor | positive |
801
  |:--------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
802
  | type | string | string |
803
- | details | <ul><li>min: 12 tokens</li><li>mean: 38.68 tokens</li><li>max: 106 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 114.45 tokens</li><li>max: 256 tokens</li></ul> |
804
  * Samples:
805
  | anchor | positive |
806
  |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
@@ -819,7 +819,7 @@ You can finetune this model on your own dataset.
819
  #### Non-Default Hyperparameters
820
 
821
  - `eval_strategy`: steps
822
- - `per_device_train_batch_size`: 256
823
  - `learning_rate`: 5e-06
824
  - `num_train_epochs`: 2
825
  - `warmup_ratio`: 0.1
@@ -833,7 +833,7 @@ You can finetune this model on your own dataset.
833
  - `do_predict`: False
834
  - `eval_strategy`: steps
835
  - `prediction_loss_only`: True
836
- - `per_device_train_batch_size`: 256
837
  - `per_device_eval_batch_size`: 8
838
  - `per_gpu_train_batch_size`: None
839
  - `per_gpu_eval_batch_size`: None
@@ -946,13 +946,25 @@ You can finetune this model on your own dataset.
946
  </details>
947
 
948
  ### Training Logs
949
- | Epoch | Step | mteb/AILA_casedocs_cosine_ndcg@10 | mteb/AILA_statutes_cosine_ndcg@10 | mteb/legalbench_consumer_contracts_qa_cosine_ndcg@10 | mteb/legalbench_corporate_lobbying_cosine_ndcg@10 | mteb/legal_summarization_cosine_ndcg@10 |
950
- |:------:|:----:|:---------------------------------:|:---------------------------------:|:----------------------------------------------------:|:-------------------------------------------------:|:---------------------------------------:|
951
- | 0 | 0 | 0.1972 | 0.2052 | 0.6560 | 0.8641 | 0.5900 |
952
- | 0.4717 | 100 | 0.2409 | 0.2173 | 0.6624 | 0.8766 | 0.6055 |
953
- | 0.9434 | 200 | 0.2489 | 0.2207 | 0.6553 | 0.8725 | 0.5998 |
954
- | 1.4151 | 300 | 0.2619 | 0.2355 | 0.6641 | 0.8790 | 0.5992 |
955
- | 1.8868 | 400 | 0.2620 | 0.2333 | 0.6609 | 0.8773 | 0.5960 |
 
 
 
 
 
 
 
 
 
 
 
 
956
 
957
 
958
  ### Framework Versions
 
8
  - generated_from_trainer
9
  - dataset_size:53224
10
  - loss:MultipleNegativesRankingLoss
11
+ base_model: sentence-transformers/all-mpnet-base-v2
12
  widget:
13
  - source_sentence: ' A juridical person may not be a partner of a civil law union. '
14
  sentences:
 
220
  - cosine_mrr@10
221
  - cosine_map@100
222
  model-index:
223
+ - name: SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
224
  results:
225
  - task:
226
  type: information-retrieval
 
230
  type: mteb/AILA_casedocs
231
  metrics:
232
  - type: cosine_accuracy@1
233
+ value: 0.24
234
  name: Cosine Accuracy@1
235
  - type: cosine_accuracy@3
236
+ value: 0.34
237
  name: Cosine Accuracy@3
238
  - type: cosine_accuracy@5
239
+ value: 0.4
240
  name: Cosine Accuracy@5
241
  - type: cosine_accuracy@10
242
+ value: 0.52
243
  name: Cosine Accuracy@10
244
  - type: cosine_precision@1
245
+ value: 0.24
246
  name: Cosine Precision@1
247
  - type: cosine_precision@3
248
+ value: 0.16666666666666663
249
  name: Cosine Precision@3
250
  - type: cosine_precision@5
251
+ value: 0.136
252
  name: Cosine Precision@5
253
  - type: cosine_precision@10
254
+ value: 0.094
255
  name: Cosine Precision@10
256
  - type: cosine_recall@1
257
+ value: 0.06678088578088578
258
  name: Cosine Recall@1
259
  - type: cosine_recall@3
260
+ value: 0.1388193473193473
261
  name: Cosine Recall@3
262
  - type: cosine_recall@5
263
+ value: 0.18372843822843823
264
  name: Cosine Recall@5
265
  - type: cosine_recall@10
266
+ value: 0.2667284382284382
267
  name: Cosine Recall@10
268
  - type: cosine_ndcg@10
269
+ value: 0.22218705752805715
270
  name: Cosine Ndcg@10
271
  - type: cosine_mrr@10
272
+ value: 0.3134126984126984
273
  name: Cosine Mrr@10
274
  - type: cosine_map@100
275
+ value: 0.18539536890113958
276
  name: Cosine Map@100
277
  - task:
278
  type: information-retrieval
 
282
  type: mteb/AILA_statutes
283
  metrics:
284
  - type: cosine_accuracy@1
285
+ value: 0.28
286
  name: Cosine Accuracy@1
287
  - type: cosine_accuracy@3
288
+ value: 0.58
289
  name: Cosine Accuracy@3
290
  - type: cosine_accuracy@5
291
+ value: 0.8
292
  name: Cosine Accuracy@5
293
  - type: cosine_accuracy@10
294
+ value: 0.9
295
  name: Cosine Accuracy@10
296
  - type: cosine_precision@1
297
+ value: 0.28
298
  name: Cosine Precision@1
299
  - type: cosine_precision@3
300
+ value: 0.22666666666666668
301
  name: Cosine Precision@3
302
  - type: cosine_precision@5
303
+ value: 0.22399999999999998
304
  name: Cosine Precision@5
305
  - type: cosine_precision@10
306
+ value: 0.15799999999999997
307
  name: Cosine Precision@10
308
  - type: cosine_recall@1
309
+ value: 0.073
310
  name: Cosine Recall@1
311
  - type: cosine_recall@3
312
+ value: 0.17266666666666666
313
  name: Cosine Recall@3
314
  - type: cosine_recall@5
315
+ value: 0.2763333333333334
316
  name: Cosine Recall@5
317
  - type: cosine_recall@10
318
+ value: 0.3773333333333333
319
  name: Cosine Recall@10
320
  - type: cosine_ndcg@10
321
+ value: 0.32396168684748544
322
  name: Cosine Ndcg@10
323
  - type: cosine_mrr@10
324
+ value: 0.48524603174603165
325
  name: Cosine Mrr@10
326
  - type: cosine_map@100
327
+ value: 0.26147750527977026
328
  name: Cosine Map@100
329
  - task:
330
  type: information-retrieval
 
334
  type: mteb/legalbench_consumer_contracts_qa
335
  metrics:
336
  - type: cosine_accuracy@1
337
+ value: 0.4292929292929293
338
  name: Cosine Accuracy@1
339
  - type: cosine_accuracy@3
340
+ value: 0.6363636363636364
341
  name: Cosine Accuracy@3
342
  - type: cosine_accuracy@5
343
+ value: 0.7095959595959596
344
  name: Cosine Accuracy@5
345
  - type: cosine_accuracy@10
346
+ value: 0.8156565656565656
347
  name: Cosine Accuracy@10
348
  - type: cosine_precision@1
349
+ value: 0.4292929292929293
350
  name: Cosine Precision@1
351
  - type: cosine_precision@3
352
+ value: 0.21212121212121207
353
  name: Cosine Precision@3
354
  - type: cosine_precision@5
355
+ value: 0.1419191919191919
356
  name: Cosine Precision@5
357
  - type: cosine_precision@10
358
+ value: 0.08156565656565656
359
  name: Cosine Precision@10
360
  - type: cosine_recall@1
361
+ value: 0.4292929292929293
362
  name: Cosine Recall@1
363
  - type: cosine_recall@3
364
+ value: 0.6363636363636364
365
  name: Cosine Recall@3
366
  - type: cosine_recall@5
367
+ value: 0.7095959595959596
368
  name: Cosine Recall@5
369
  - type: cosine_recall@10
370
+ value: 0.8156565656565656
371
  name: Cosine Recall@10
372
  - type: cosine_ndcg@10
373
+ value: 0.6114603730669577
374
  name: Cosine Ndcg@10
375
  - type: cosine_mrr@10
376
+ value: 0.5472532868366202
377
  name: Cosine Mrr@10
378
  - type: cosine_map@100
379
+ value: 0.555387361338846
380
  name: Cosine Map@100
381
  - task:
382
  type: information-retrieval
 
386
  type: mteb/legalbench_corporate_lobbying
387
  metrics:
388
  - type: cosine_accuracy@1
389
+ value: 0.6441176470588236
390
  name: Cosine Accuracy@1
391
  - type: cosine_accuracy@3
392
+ value: 0.8558823529411764
393
  name: Cosine Accuracy@3
394
  - type: cosine_accuracy@5
395
+ value: 0.8823529411764706
396
  name: Cosine Accuracy@5
397
  - type: cosine_accuracy@10
398
+ value: 0.9147058823529411
399
  name: Cosine Accuracy@10
400
  - type: cosine_precision@1
401
+ value: 0.6441176470588236
402
  name: Cosine Precision@1
403
  - type: cosine_precision@3
404
+ value: 0.2852941176470588
405
  name: Cosine Precision@3
406
  - type: cosine_precision@5
407
+ value: 0.17647058823529413
408
  name: Cosine Precision@5
409
  - type: cosine_precision@10
410
+ value: 0.09147058823529411
411
  name: Cosine Precision@10
412
  - type: cosine_recall@1
413
+ value: 0.6441176470588236
414
  name: Cosine Recall@1
415
  - type: cosine_recall@3
416
+ value: 0.8558823529411764
417
  name: Cosine Recall@3
418
  - type: cosine_recall@5
419
+ value: 0.8823529411764706
420
  name: Cosine Recall@5
421
  - type: cosine_recall@10
422
+ value: 0.9147058823529411
423
  name: Cosine Recall@10
424
  - type: cosine_ndcg@10
425
+ value: 0.7924078571703878
426
  name: Cosine Ndcg@10
427
  - type: cosine_mrr@10
428
+ value: 0.751936274509804
429
  name: Cosine Mrr@10
430
  - type: cosine_map@100
431
+ value: 0.754712212674935
432
  name: Cosine Map@100
433
  - task:
434
  type: information-retrieval
 
438
  type: mteb/legal_summarization
439
  metrics:
440
  - type: cosine_accuracy@1
441
+ value: 0.41901408450704225
442
  name: Cosine Accuracy@1
443
  - type: cosine_accuracy@3
444
+ value: 0.5563380281690141
445
  name: Cosine Accuracy@3
446
  - type: cosine_accuracy@5
447
+ value: 0.6338028169014085
448
  name: Cosine Accuracy@5
449
  - type: cosine_accuracy@10
450
+ value: 0.7183098591549296
451
  name: Cosine Accuracy@10
452
  - type: cosine_precision@1
453
+ value: 0.41901408450704225
454
  name: Cosine Precision@1
455
  - type: cosine_precision@3
456
+ value: 0.20070422535211266
457
  name: Cosine Precision@3
458
  - type: cosine_precision@5
459
+ value: 0.14295774647887324
460
  name: Cosine Precision@5
461
  - type: cosine_precision@10
462
+ value: 0.08838028169014084
463
  name: Cosine Precision@10
464
  - type: cosine_recall@1
465
+ value: 0.35939538747637334
466
  name: Cosine Recall@1
467
  - type: cosine_recall@3
468
+ value: 0.4814835985610633
469
  name: Cosine Recall@3
470
  - type: cosine_recall@5
471
+ value: 0.5483042192549235
472
  name: Cosine Recall@5
473
  - type: cosine_recall@10
474
+ value: 0.6505441741357234
475
  name: Cosine Recall@10
476
  - type: cosine_ndcg@10
477
+ value: 0.5155518221457815
478
  name: Cosine Ndcg@10
479
  - type: cosine_mrr@10
480
+ value: 0.5074348871003801
481
  name: Cosine Mrr@10
482
  - type: cosine_map@100
483
+ value: 0.46706462134757426
484
  name: Cosine Map@100
485
  ---
486
 
487
+ # SentenceTransformer based on sentence-transformers/all-mpnet-base-v2
488
 
489
+ This is a [sentence-transformers](https://www.SBERT.net) model finetuned from [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) on the [coliee](https://huggingface.co/datasets/sentence-transformers/coliee), [legal_qa](https://huggingface.co/datasets/bwang0911/legal_qa_v1), [law_stack](https://huggingface.co/datasets/bwang0911/law_stackexchange), [legal_lens](https://huggingface.co/datasets/bwang0911/legal_lens_nli), [cuad_qa](https://huggingface.co/datasets/bwang0911/cuad_qa), [privacy_qa](https://huggingface.co/datasets/bwang0911/privacy_qa), [legal_sum](https://huggingface.co/datasets/bwang0911/legal_case_summarization) and [aus_legal_qa](https://huggingface.co/datasets/bwang0911/aus_legal_qa) datasets. It maps sentences & paragraphs to a 768-dimensional dense vector space and can be used for semantic textual similarity, semantic search, paraphrase mining, text classification, clustering, and more.
490
 
491
  ## Model Details
492
 
493
  ### Model Description
494
  - **Model Type:** Sentence Transformer
495
+ - **Base model:** [sentence-transformers/all-mpnet-base-v2](https://huggingface.co/sentence-transformers/all-mpnet-base-v2) <!-- at revision 9a3225965996d404b775526de6dbfe85d3368642 -->
496
+ - **Maximum Sequence Length:** 192 tokens
497
+ - **Output Dimensionality:** 768 dimensions
498
  - **Similarity Function:** Cosine Similarity
499
  - **Training Datasets:**
500
  - [coliee](https://huggingface.co/datasets/sentence-transformers/coliee)
 
518
 
519
  ```
520
  SentenceTransformer(
521
+ (0): Transformer({'max_seq_length': 192, 'do_lower_case': False}) with Transformer model: MPNetModel
522
+ (1): Pooling({'word_embedding_dimension': 768, 'pooling_mode_cls_token': False, 'pooling_mode_mean_tokens': True, 'pooling_mode_max_tokens': False, 'pooling_mode_mean_sqrt_len_tokens': False, 'pooling_mode_weightedmean_tokens': False, 'pooling_mode_lasttoken': False, 'include_prompt': True})
523
  (2): Normalize()
524
  )
525
  ```
 
548
  ]
549
  embeddings = model.encode(sentences)
550
  print(embeddings.shape)
551
+ # [3, 768]
552
 
553
  # Get the similarity scores for the embeddings
554
  similarities = model.similarity(embeddings, embeddings)
 
591
 
592
  | Metric | mteb/AILA_casedocs | mteb/AILA_statutes | mteb/legalbench_consumer_contracts_qa | mteb/legalbench_corporate_lobbying | mteb/legal_summarization |
593
  |:--------------------|:-------------------|:-------------------|:--------------------------------------|:-----------------------------------|:-------------------------|
594
+ | cosine_accuracy@1 | 0.24 | 0.28 | 0.4293 | 0.6441 | 0.419 |
595
+ | cosine_accuracy@3 | 0.34 | 0.58 | 0.6364 | 0.8559 | 0.5563 |
596
+ | cosine_accuracy@5 | 0.4 | 0.8 | 0.7096 | 0.8824 | 0.6338 |
597
+ | cosine_accuracy@10 | 0.52 | 0.9 | 0.8157 | 0.9147 | 0.7183 |
598
+ | cosine_precision@1 | 0.24 | 0.28 | 0.4293 | 0.6441 | 0.419 |
599
+ | cosine_precision@3 | 0.1667 | 0.2267 | 0.2121 | 0.2853 | 0.2007 |
600
+ | cosine_precision@5 | 0.136 | 0.224 | 0.1419 | 0.1765 | 0.143 |
601
+ | cosine_precision@10 | 0.094 | 0.158 | 0.0816 | 0.0915 | 0.0884 |
602
+ | cosine_recall@1 | 0.0668 | 0.073 | 0.4293 | 0.6441 | 0.3594 |
603
+ | cosine_recall@3 | 0.1388 | 0.1727 | 0.6364 | 0.8559 | 0.4815 |
604
+ | cosine_recall@5 | 0.1837 | 0.2763 | 0.7096 | 0.8824 | 0.5483 |
605
+ | cosine_recall@10 | 0.2667 | 0.3773 | 0.8157 | 0.9147 | 0.6505 |
606
+ | **cosine_ndcg@10** | **0.2222** | **0.324** | **0.6115** | **0.7924** | **0.5156** |
607
+ | cosine_mrr@10 | 0.3134 | 0.4852 | 0.5473 | 0.7519 | 0.5074 |
608
+ | cosine_map@100 | 0.1854 | 0.2615 | 0.5554 | 0.7547 | 0.4671 |
609
 
610
  <!--
611
  ## Bias, Risks and Limitations
 
632
  | | anchor | positive | negative |
633
  |:--------|:-----------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
634
  | type | string | string | string |
635
+ | details | <ul><li>min: 11 tokens</li><li>mean: 41.76 tokens</li><li>max: 99 tokens</li></ul> | <ul><li>min: 25 tokens</li><li>mean: 119.1 tokens</li><li>max: 192 tokens</li></ul> | <ul><li>min: 14 tokens</li><li>mean: 113.91 tokens</li><li>max: 192 tokens</li></ul> |
636
  * Samples:
637
  | anchor | positive | negative |
638
  |:-------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
653
  * Size: 3,742 training samples
654
  * Columns: <code>anchor</code> and <code>positive</code>
655
  * Approximate statistics based on the first 1000 samples:
656
+ | | anchor | positive |
657
+ |:--------|:-------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
658
+ | type | string | string |
659
+ | details | <ul><li>min: 13 tokens</li><li>mean: 108.12 tokens</li><li>max: 192 tokens</li></ul> | <ul><li>min: 6 tokens</li><li>mean: 130.94 tokens</li><li>max: 192 tokens</li></ul> |
660
  * Samples:
661
  | anchor | positive |
662
  |:----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
680
  | | anchor | positive |
681
  |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
682
  | type | string | string |
683
+ | details | <ul><li>min: 27 tokens</li><li>mean: 141.93 tokens</li><li>max: 192 tokens</li></ul> | <ul><li>min: 15 tokens</li><li>mean: 166.18 tokens</li><li>max: 192 tokens</li></ul> |
684
  * Samples:
685
  | anchor | positive |
686
  |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
701
  * Size: 107 training samples
702
  * Columns: <code>anchor</code> and <code>positive</code>
703
  * Approximate statistics based on the first 107 samples:
704
+ | | anchor | positive |
705
+ |:--------|:--------------------------------------------------------------------------------------|:------------------------------------------------------------------------------------|
706
+ | type | string | string |
707
+ | details | <ul><li>min: 107 tokens</li><li>mean: 164.29 tokens</li><li>max: 192 tokens</li></ul> | <ul><li>min: 27 tokens</li><li>mean: 78.31 tokens</li><li>max: 192 tokens</li></ul> |
708
  * Samples:
709
  | anchor | positive |
710
  |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
725
  * Size: 11,180 training samples
726
  * Columns: <code>anchor</code> and <code>positive</code>
727
  * Approximate statistics based on the first 1000 samples:
728
+ | | anchor | positive |
729
+ |:--------|:------------------------------------------------------------------------------------|:----------------------------------------------------------------------------------|
730
+ | type | string | string |
731
+ | details | <ul><li>min: 33 tokens</li><li>mean: 51.31 tokens</li><li>max: 105 tokens</li></ul> | <ul><li>min: 3 tokens</li><li>mean: 57.1 tokens</li><li>max: 192 tokens</li></ul> |
732
  * Samples:
733
  | anchor | positive |
734
  |:------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:----------------------------------------------|
 
776
  | | anchor | positive |
777
  |:--------|:-------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
778
  | type | string | string |
779
+ | details | <ul><li>min: 192 tokens</li><li>mean: 192.0 tokens</li><li>max: 192 tokens</li></ul> | <ul><li>min: 63 tokens</li><li>mean: 191.26 tokens</li><li>max: 192 tokens</li></ul> |
780
  * Samples:
781
  | anchor | positive |
782
  |:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
800
  | | anchor | positive |
801
  |:--------|:------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------|
802
  | type | string | string |
803
+ | details | <ul><li>min: 12 tokens</li><li>mean: 38.68 tokens</li><li>max: 106 tokens</li></ul> | <ul><li>min: 21 tokens</li><li>mean: 111.75 tokens</li><li>max: 192 tokens</li></ul> |
804
  * Samples:
805
  | anchor | positive |
806
  |:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|:-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------|
 
819
  #### Non-Default Hyperparameters
820
 
821
  - `eval_strategy`: steps
822
+ - `per_device_train_batch_size`: 64
823
  - `learning_rate`: 5e-06
824
  - `num_train_epochs`: 2
825
  - `warmup_ratio`: 0.1
 
833
  - `do_predict`: False
834
  - `eval_strategy`: steps
835
  - `prediction_loss_only`: True
836
+ - `per_device_train_batch_size`: 64
837
  - `per_device_eval_batch_size`: 8
838
  - `per_gpu_train_batch_size`: None
839
  - `per_gpu_eval_batch_size`: None
 
946
  </details>
947
 
948
  ### Training Logs
949
+ | Epoch | Step | Training Loss | mteb/AILA_casedocs_cosine_ndcg@10 | mteb/AILA_statutes_cosine_ndcg@10 | mteb/legalbench_consumer_contracts_qa_cosine_ndcg@10 | mteb/legalbench_corporate_lobbying_cosine_ndcg@10 | mteb/legal_summarization_cosine_ndcg@10 |
950
+ |:------:|:----:|:-------------:|:---------------------------------:|:---------------------------------:|:----------------------------------------------------:|:-------------------------------------------------:|:---------------------------------------:|
951
+ | 0 | 0 | - | 0.1704 | 0.2351 | 0.6781 | 0.8793 | 0.5766 |
952
+ | 0.1196 | 100 | - | 0.2192 | 0.2808 | 0.6816 | 0.8857 | 0.6033 |
953
+ | 0.2392 | 200 | - | 0.2285 | 0.2958 | 0.6637 | 0.8878 | 0.6141 |
954
+ | 0.3589 | 300 | - | 0.2384 | 0.3174 | 0.6504 | 0.8820 | 0.6103 |
955
+ | 0.4785 | 400 | - | 0.2349 | 0.3105 | 0.6379 | 0.8626 | 0.5871 |
956
+ | 0.5981 | 500 | 1.9344 | 0.2223 | 0.3026 | 0.6288 | 0.8476 | 0.5743 |
957
+ | 0.7177 | 600 | - | 0.2155 | 0.3078 | 0.6247 | 0.8277 | 0.5571 |
958
+ | 0.8373 | 700 | - | 0.2179 | 0.3183 | 0.6244 | 0.8389 | 0.5469 |
959
+ | 0.9569 | 800 | - | 0.2145 | 0.3207 | 0.6230 | 0.8368 | 0.5374 |
960
+ | 1.0766 | 900 | - | 0.2045 | 0.3241 | 0.6257 | 0.8331 | 0.5360 |
961
+ | 1.1962 | 1000 | 0.9429 | 0.2162 | 0.3450 | 0.6145 | 0.8216 | 0.5296 |
962
+ | 1.3158 | 1100 | - | 0.2175 | 0.3369 | 0.6149 | 0.8160 | 0.5308 |
963
+ | 1.4354 | 1200 | - | 0.2274 | 0.3246 | 0.6095 | 0.8020 | 0.5262 |
964
+ | 1.5550 | 1300 | - | 0.2217 | 0.3273 | 0.6182 | 0.8030 | 0.5244 |
965
+ | 1.6746 | 1400 | - | 0.2186 | 0.3226 | 0.6145 | 0.7935 | 0.5196 |
966
+ | 1.7943 | 1500 | 0.9098 | 0.2222 | 0.3203 | 0.6129 | 0.7898 | 0.5178 |
967
+ | 1.9139 | 1600 | - | 0.2222 | 0.3240 | 0.6115 | 0.7924 | 0.5156 |
968
 
969
 
970
  ### Framework Versions
config.json CHANGED
@@ -1,26 +1,24 @@
1
  {
2
- "_name_or_path": "sentence-transformers/all-MiniLM-L6-v2",
3
  "architectures": [
4
- "BertModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
- "classifier_dropout": null,
8
- "gradient_checkpointing": false,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
- "hidden_size": 384,
12
  "initializer_range": 0.02,
13
- "intermediate_size": 1536,
14
- "layer_norm_eps": 1e-12,
15
- "max_position_embeddings": 512,
16
- "model_type": "bert",
17
  "num_attention_heads": 12,
18
- "num_hidden_layers": 6,
19
- "pad_token_id": 0,
20
- "position_embedding_type": "absolute",
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.45.2",
23
- "type_vocab_size": 2,
24
- "use_cache": true,
25
- "vocab_size": 30522
26
  }
 
1
  {
2
+ "_name_or_path": "sentence-transformers/all-mpnet-base-v2",
3
  "architectures": [
4
+ "MPNetModel"
5
  ],
6
  "attention_probs_dropout_prob": 0.1,
7
+ "bos_token_id": 0,
8
+ "eos_token_id": 2,
9
  "hidden_act": "gelu",
10
  "hidden_dropout_prob": 0.1,
11
+ "hidden_size": 768,
12
  "initializer_range": 0.02,
13
+ "intermediate_size": 3072,
14
+ "layer_norm_eps": 1e-05,
15
+ "max_position_embeddings": 514,
16
+ "model_type": "mpnet",
17
  "num_attention_heads": 12,
18
+ "num_hidden_layers": 12,
19
+ "pad_token_id": 1,
20
+ "relative_attention_num_buckets": 32,
21
  "torch_dtype": "float32",
22
  "transformers_version": "4.45.2",
23
+ "vocab_size": 30527
 
 
24
  }
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:377c6c0f2f16b01e67bae8bf3d7ec4fa58444f4f55ca3465753fcaf6722e4bd4
3
- size 90864192
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6d829c685c2a71831ec9bcba3b2fb9e107b46825a36f9eed6f4a5d0c91fb174
3
+ size 437967672
sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 256,
3
  "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 192,
3
  "do_lower_case": false
4
  }
special_tokens_map.json CHANGED
@@ -1,27 +1,41 @@
1
  {
 
 
 
 
 
 
 
2
  "cls_token": {
3
- "content": "[CLS]",
4
  "lstrip": false,
5
  "normalized": false,
6
  "rstrip": false,
7
  "single_word": false
8
  },
9
- "mask_token": {
10
- "content": "[MASK]",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
 
 
 
 
 
 
 
16
  "pad_token": {
17
- "content": "[PAD]",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
  "sep_token": {
24
- "content": "[SEP]",
25
  "lstrip": false,
26
  "normalized": false,
27
  "rstrip": false,
 
1
  {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
  "cls_token": {
10
+ "content": "<s>",
11
  "lstrip": false,
12
  "normalized": false,
13
  "rstrip": false,
14
  "single_word": false
15
  },
16
+ "eos_token": {
17
+ "content": "</s>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
21
  "single_word": false
22
  },
23
+ "mask_token": {
24
+ "content": "<mask>",
25
+ "lstrip": true,
26
+ "normalized": false,
27
+ "rstrip": false,
28
+ "single_word": false
29
+ },
30
  "pad_token": {
31
+ "content": "<pad>",
32
  "lstrip": false,
33
  "normalized": false,
34
  "rstrip": false,
35
  "single_word": false
36
  },
37
  "sep_token": {
38
+ "content": "</s>",
39
  "lstrip": false,
40
  "normalized": false,
41
  "rstrip": false,
tokenizer.json CHANGED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json CHANGED
@@ -1,63 +1,71 @@
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
- "content": "[PAD]",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
- "100": {
12
- "content": "[UNK]",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
- "101": {
20
- "content": "[CLS]",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
- "102": {
28
- "content": "[SEP]",
29
  "lstrip": false,
30
- "normalized": false,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
- "103": {
36
- "content": "[MASK]",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
 
 
 
 
 
 
 
 
42
  }
43
  },
 
44
  "clean_up_tokenization_spaces": false,
45
- "cls_token": "[CLS]",
46
- "do_basic_tokenize": true,
47
  "do_lower_case": true,
48
- "mask_token": "[MASK]",
 
49
  "max_length": 128,
50
- "model_max_length": 256,
51
- "never_split": null,
52
  "pad_to_multiple_of": null,
53
- "pad_token": "[PAD]",
54
  "pad_token_type_id": 0,
55
  "padding_side": "right",
56
- "sep_token": "[SEP]",
57
  "stride": 0,
58
  "strip_accents": null,
59
  "tokenize_chinese_chars": true,
60
- "tokenizer_class": "BertTokenizer",
61
  "truncation_side": "right",
62
  "truncation_strategy": "longest_first",
63
  "unk_token": "[UNK]"
 
1
  {
2
  "added_tokens_decoder": {
3
  "0": {
4
+ "content": "<s>",
5
  "lstrip": false,
6
  "normalized": false,
7
  "rstrip": false,
8
  "single_word": false,
9
  "special": true
10
  },
11
+ "1": {
12
+ "content": "<pad>",
13
  "lstrip": false,
14
  "normalized": false,
15
  "rstrip": false,
16
  "single_word": false,
17
  "special": true
18
  },
19
+ "2": {
20
+ "content": "</s>",
21
  "lstrip": false,
22
  "normalized": false,
23
  "rstrip": false,
24
  "single_word": false,
25
  "special": true
26
  },
27
+ "3": {
28
+ "content": "<unk>",
29
  "lstrip": false,
30
+ "normalized": true,
31
  "rstrip": false,
32
  "single_word": false,
33
  "special": true
34
  },
35
+ "104": {
36
+ "content": "[UNK]",
37
  "lstrip": false,
38
  "normalized": false,
39
  "rstrip": false,
40
  "single_word": false,
41
  "special": true
42
+ },
43
+ "30526": {
44
+ "content": "<mask>",
45
+ "lstrip": true,
46
+ "normalized": false,
47
+ "rstrip": false,
48
+ "single_word": false,
49
+ "special": true
50
  }
51
  },
52
+ "bos_token": "<s>",
53
  "clean_up_tokenization_spaces": false,
54
+ "cls_token": "<s>",
 
55
  "do_lower_case": true,
56
+ "eos_token": "</s>",
57
+ "mask_token": "<mask>",
58
  "max_length": 128,
59
+ "model_max_length": 384,
 
60
  "pad_to_multiple_of": null,
61
+ "pad_token": "<pad>",
62
  "pad_token_type_id": 0,
63
  "padding_side": "right",
64
+ "sep_token": "</s>",
65
  "stride": 0,
66
  "strip_accents": null,
67
  "tokenize_chinese_chars": true,
68
+ "tokenizer_class": "MPNetTokenizer",
69
  "truncation_side": "right",
70
  "truncation_strategy": "longest_first",
71
  "unk_token": "[UNK]"
vocab.txt CHANGED
@@ -1,3 +1,7 @@
 
 
 
 
1
  [PAD]
2
  [unused0]
3
  [unused1]
@@ -30520,3 +30524,4 @@ necessitated
30520
  ##:
30521
  ##?
30522
  ##~
 
 
1
+ <s>
2
+ <pad>
3
+ </s>
4
+ <unk>
5
  [PAD]
6
  [unused0]
7
  [unused1]
 
30524
  ##:
30525
  ##?
30526
  ##~
30527
+ <mask>