sileod commited on
Commit
9ba3d26
1 Parent(s): 3b0d1a4

Add new SentenceTransformer model

Browse files
README.md CHANGED
The diff for this file is too large to render. See raw diff
 
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "tasksource/ModernBERT-base-nli",
3
  "architectures": [
4
  "ModernBertModel"
5
  ],
@@ -10,116 +10,6 @@
10
  "classifier_bias": false,
11
  "classifier_dropout": 0.0,
12
  "classifier_pooling": "mean",
13
- "classifiers_size": [
14
- 3,
15
- 2,
16
- 2,
17
- 2,
18
- 2,
19
- 3,
20
- 3,
21
- 3,
22
- 3,
23
- 3,
24
- 3,
25
- 3,
26
- 2,
27
- 2,
28
- 3,
29
- 2,
30
- 2,
31
- 2,
32
- 2,
33
- 2,
34
- 6,
35
- 2,
36
- 2,
37
- 2,
38
- 2,
39
- 2,
40
- 3,
41
- 3,
42
- 3,
43
- 3,
44
- 3,
45
- 3,
46
- 3,
47
- 2,
48
- 2,
49
- 2,
50
- 2,
51
- 3,
52
- 3,
53
- 3,
54
- 3,
55
- 3,
56
- 3,
57
- 3,
58
- 3,
59
- 2,
60
- 2,
61
- 2,
62
- 2,
63
- 3,
64
- 2,
65
- 4,
66
- 3,
67
- 3,
68
- 2,
69
- 2,
70
- 2,
71
- 2,
72
- 2,
73
- 3,
74
- 2,
75
- 3,
76
- 2,
77
- 4,
78
- 3,
79
- 3,
80
- 3,
81
- 2,
82
- 3,
83
- 1,
84
- 2,
85
- 2,
86
- 3,
87
- 13,
88
- 2,
89
- 3,
90
- 2,
91
- 2,
92
- 3,
93
- 3,
94
- 2,
95
- 3,
96
- 3,
97
- 2,
98
- 3,
99
- 2,
100
- 2,
101
- 2,
102
- 2,
103
- 2,
104
- 3,
105
- 4,
106
- 3,
107
- 3,
108
- 2,
109
- 2,
110
- 3,
111
- 3,
112
- 2,
113
- 2,
114
- 2,
115
- 2,
116
- 2,
117
- 4,
118
- 3,
119
- 2,
120
- 2,
121
- 3
122
- ],
123
  "cls_token_id": 50281,
124
  "decoder_bias": true,
125
  "deterministic_flash_attn": false,
@@ -130,23 +20,13 @@
130
  "gradient_checkpointing": false,
131
  "hidden_activation": "gelu",
132
  "hidden_size": 768,
133
- "id2label": {
134
- "0": "entailment",
135
- "1": "neutral",
136
- "2": "contradiction"
137
- },
138
  "initializer_cutoff_factor": 2.0,
139
  "initializer_range": 0.02,
140
  "intermediate_size": 1152,
141
- "label2id": {
142
- "contradiction": 2,
143
- "entailment": 0,
144
- "neutral": 1
145
- },
146
  "layer_norm_eps": 1e-05,
147
  "local_attention": 128,
148
  "local_rope_theta": 10000.0,
149
- "max_position_embeddings": 2048,
150
  "mlp_bias": false,
151
  "mlp_dropout": 0.0,
152
  "model_type": "modernbert",
@@ -156,121 +36,10 @@
156
  "num_hidden_layers": 22,
157
  "pad_token_id": 50283,
158
  "position_embedding_type": "absolute",
159
- "problem_type": "single_label_classification",
160
  "reference_compile": true,
161
  "sep_token_id": 50282,
162
  "sparse_pred_ignore_index": -100,
163
  "sparse_prediction": false,
164
- "tasks": [
165
- "glue/mnli",
166
- "glue/qnli",
167
- "glue/rte",
168
- "glue/wnli",
169
- "super_glue/boolq",
170
- "super_glue/cb",
171
- "anli/a1",
172
- "anli/a2",
173
- "anli/a3",
174
- "sick/label",
175
- "sick/entailment_AB",
176
- "snli",
177
- "scitail/snli_format",
178
- "hans",
179
- "WANLI",
180
- "recast/recast_sentiment",
181
- "recast/recast_verbcorner",
182
- "recast/recast_ner",
183
- "recast/recast_factuality",
184
- "recast/recast_puns",
185
- "recast/recast_kg_relations",
186
- "recast/recast_verbnet",
187
- "recast/recast_megaveridicality",
188
- "probability_words_nli/usnli",
189
- "probability_words_nli/reasoning_1hop",
190
- "probability_words_nli/reasoning_2hop",
191
- "nan-nli",
192
- "nli_fever",
193
- "breaking_nli",
194
- "conj_nli",
195
- "fracas",
196
- "dialogue_nli",
197
- "mpe",
198
- "dnc",
199
- "recast_white/fnplus",
200
- "recast_white/sprl",
201
- "recast_white/dpr",
202
- "robust_nli/IS_CS",
203
- "robust_nli/LI_LI",
204
- "robust_nli/ST_WO",
205
- "robust_nli/PI_SP",
206
- "robust_nli/PI_CD",
207
- "robust_nli/ST_SE",
208
- "robust_nli/ST_NE",
209
- "robust_nli/ST_LM",
210
- "robust_nli_is_sd",
211
- "robust_nli_li_ts",
212
- "add_one_rte",
213
- "cycic_classification",
214
- "lingnli",
215
- "monotonicity-entailment",
216
- "scinli",
217
- "naturallogic",
218
- "syntactic-augmentation-nli",
219
- "autotnli",
220
- "defeasible-nli/atomic",
221
- "defeasible-nli/snli",
222
- "help-nli",
223
- "nli-veridicality-transitivity",
224
- "lonli",
225
- "dadc-limit-nli",
226
- "folio",
227
- "tomi-nli",
228
- "puzzte",
229
- "temporal-nli",
230
- "counterfactually-augmented-snli",
231
- "cnli",
232
- "boolq-natural-perturbations",
233
- "equate",
234
- "chaos-mnli-ambiguity",
235
- "logiqa-2.0-nli",
236
- "mindgames",
237
- "ConTRoL-nli",
238
- "logical-fallacy",
239
- "conceptrules_v2",
240
- "zero-shot-label-nli",
241
- "scone",
242
- "monli",
243
- "SpaceNLI",
244
- "propsegment/nli",
245
- "SDOH-NLI",
246
- "scifact_entailment",
247
- "AdjectiveScaleProbe-nli",
248
- "resnli",
249
- "semantic_fragments_nli",
250
- "dataset_train_nli",
251
- "nlgraph",
252
- "ruletaker",
253
- "PARARULE-Plus",
254
- "logical-entailment",
255
- "nope",
256
- "LogicNLI",
257
- "contract-nli/contractnli_a/seg",
258
- "contract-nli/contractnli_b/full",
259
- "nli4ct_semeval2024",
260
- "biosift-nli",
261
- "SIGA-nli",
262
- "FOL-nli",
263
- "doc-nli",
264
- "mctest-nli",
265
- "natural-language-satisfiability",
266
- "idioms-nli",
267
- "lifecycle-entailment",
268
- "MSciNLI",
269
- "hover-3way/nli",
270
- "seahorse_summarization_evaluation",
271
- "babi_nli",
272
- "gen_debiased_nli"
273
- ],
274
  "torch_dtype": "float32",
275
  "transformers_version": "4.48.0.dev0",
276
  "vocab_size": 50368
 
1
  {
2
+ "_name_or_path": "answerdotai/ModernBERT-base",
3
  "architectures": [
4
  "ModernBertModel"
5
  ],
 
10
  "classifier_bias": false,
11
  "classifier_dropout": 0.0,
12
  "classifier_pooling": "mean",
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
13
  "cls_token_id": 50281,
14
  "decoder_bias": true,
15
  "deterministic_flash_attn": false,
 
20
  "gradient_checkpointing": false,
21
  "hidden_activation": "gelu",
22
  "hidden_size": 768,
 
 
 
 
 
23
  "initializer_cutoff_factor": 2.0,
24
  "initializer_range": 0.02,
25
  "intermediate_size": 1152,
 
 
 
 
 
26
  "layer_norm_eps": 1e-05,
27
  "local_attention": 128,
28
  "local_rope_theta": 10000.0,
29
+ "max_position_embeddings": 8192,
30
  "mlp_bias": false,
31
  "mlp_dropout": 0.0,
32
  "model_type": "modernbert",
 
36
  "num_hidden_layers": 22,
37
  "pad_token_id": 50283,
38
  "position_embedding_type": "absolute",
 
39
  "reference_compile": true,
40
  "sep_token_id": 50282,
41
  "sparse_pred_ignore_index": -100,
42
  "sparse_prediction": false,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43
  "torch_dtype": "float32",
44
  "transformers_version": "4.48.0.dev0",
45
  "vocab_size": 50368
model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4f3e4eaefdf2c3a2062d343e925bad3c10166870ec2854b3733a6381b7f465e8
3
  size 596070136
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b6027ea97411457d92dc3a5048481e4fa6859dbaeda6ac805307229298cecbd9
3
  size 596070136
sentence_bert_config.json CHANGED
@@ -1,4 +1,4 @@
1
  {
2
- "max_seq_length": 2048,
3
  "do_lower_case": false
4
  }
 
1
  {
2
+ "max_seq_length": 8192,
3
  "do_lower_case": false
4
  }
tokenizer.json CHANGED
@@ -2,7 +2,7 @@
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
- "max_length": 2048,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
 
2
  "version": "1.0",
3
  "truncation": {
4
  "direction": "Right",
5
+ "max_length": 8192,
6
  "strategy": "LongestFirst",
7
  "stride": 0
8
  },
tokenizer_config.json CHANGED
@@ -933,20 +933,13 @@
933
  "cls_token": "[CLS]",
934
  "extra_special_tokens": {},
935
  "mask_token": "[MASK]",
936
- "max_length": 2048,
937
  "model_input_names": [
938
  "input_ids",
939
  "attention_mask"
940
  ],
941
  "model_max_length": 1000000000000000019884624838656,
942
- "pad_to_multiple_of": null,
943
  "pad_token": "[PAD]",
944
- "pad_token_type_id": 0,
945
- "padding_side": "right",
946
  "sep_token": "[SEP]",
947
- "stride": 0,
948
  "tokenizer_class": "PreTrainedTokenizerFast",
949
- "truncation_side": "right",
950
- "truncation_strategy": "longest_first",
951
  "unk_token": "[UNK]"
952
  }
 
933
  "cls_token": "[CLS]",
934
  "extra_special_tokens": {},
935
  "mask_token": "[MASK]",
 
936
  "model_input_names": [
937
  "input_ids",
938
  "attention_mask"
939
  ],
940
  "model_max_length": 1000000000000000019884624838656,
 
941
  "pad_token": "[PAD]",
 
 
942
  "sep_token": "[SEP]",
 
943
  "tokenizer_class": "PreTrainedTokenizerFast",
 
 
944
  "unk_token": "[UNK]"
945
  }