KoichiYasuoka's picture
model improved
d01eb45
{
"architectures": [
"ModernBertForTokenClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_modernbert.ModernBertConfig",
"AutoModel": "modeling_modernbert.ModernBertModel",
"AutoModelForMaskedLM": "modeling_modernbert.ModernBertForMaskedLM",
"AutoModelForSequenceClassification": "modeling_modernbert.ModernBertForSequenceClassification",
"AutoModelForTokenClassification": "modeling_modernbert.ModernBertForTokenClassification"
},
"bos_token_id": 0,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 0,
"custom_pipelines": {
"universal-dependencies": {
"impl": "ud.UniversalDependenciesPipeline",
"pt": "AutoModelForTokenClassification"
}
},
"decoder_bias": true,
"deterministic_flash_attn": false,
"embedding_dropout": 0.0,
"eos_token_id": 2,
"global_attn_every_n_layers": 3,
"global_rope_theta": 160000.0,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 768,
"id2label": {
"0": "ADJ|o",
"1": "ADJ|o|l-acl",
"2": "ADJ|o|l-advcl",
"3": "ADJ|o|l-amod",
"4": "ADJ|o|l-ccomp",
"5": "ADJ|o|l-csubj",
"6": "ADJ|o|l-csubj:outer",
"7": "ADJ|o|l-nmod",
"8": "ADJ|o|l-nsubj",
"9": "ADJ|o|l-obj",
"10": "ADJ|o|l-obl",
"11": "ADJ|o|r-acl",
"12": "ADJ|o|r-amod",
"13": "ADJ|o|r-dep",
"14": "ADJ|o|root",
"15": "ADJ|x",
"16": "ADJ|x|l-acl",
"17": "ADJ|x|l-advcl",
"18": "ADJ|x|l-amod",
"19": "ADJ|x|l-ccomp",
"20": "ADJ|x|l-csubj",
"21": "ADJ|x|l-csubj:outer",
"22": "ADJ|x|l-nmod",
"23": "ADJ|x|l-nsubj",
"24": "ADJ|x|l-obj",
"25": "ADJ|x|l-obl",
"26": "ADJ|x|r-acl",
"27": "ADJ|x|r-amod",
"28": "ADJ|x|r-dep",
"29": "ADJ|x|root",
"30": "ADP|o",
"31": "ADP|o|l-case",
"32": "ADP|o|r-case",
"33": "ADP|o|r-fixed",
"34": "ADP|x",
"35": "ADP|x|l-case",
"36": "ADP|x|r-case",
"37": "ADP|x|r-fixed",
"38": "ADV|o",
"39": "ADV|o|l-advcl",
"40": "ADV|o|l-advmod",
"41": "ADV|o|l-obj",
"42": "ADV|o|r-dep",
"43": "ADV|o|root",
"44": "ADV|x",
"45": "ADV|x|l-advcl",
"46": "ADV|x|l-advmod",
"47": "ADV|x|l-obj",
"48": "ADV|x|r-dep",
"49": "ADV|x|root",
"50": "AUX|o",
"51": "AUX|o|Polarity=Neg",
"52": "AUX|o|Polarity=Neg|r-aux",
"53": "AUX|o|Polarity=Neg|r-fixed",
"54": "AUX|o|r-aux",
"55": "AUX|o|r-cop",
"56": "AUX|o|r-fixed",
"57": "AUX|o|root",
"58": "AUX|x",
"59": "AUX|x|Polarity=Neg",
"60": "AUX|x|Polarity=Neg|r-aux",
"61": "AUX|x|Polarity=Neg|r-fixed",
"62": "AUX|x|r-aux",
"63": "AUX|x|r-cop",
"64": "AUX|x|r-fixed",
"65": "AUX|x|root",
"66": "CCONJ|o",
"67": "CCONJ|o|l-cc",
"68": "CCONJ|o|r-cc",
"69": "CCONJ|x",
"70": "CCONJ|x|l-cc",
"71": "CCONJ|x|r-cc",
"72": "DET|o",
"73": "DET|o|l-det",
"74": "DET|x",
"75": "DET|x|l-det",
"76": "INTJ|o",
"77": "INTJ|o|l-discourse",
"78": "INTJ|o|r-discourse",
"79": "INTJ|o|root",
"80": "INTJ|x",
"81": "INTJ|x|l-discourse",
"82": "INTJ|x|r-discourse",
"83": "INTJ|x|root",
"84": "NOUN|o",
"85": "NOUN|o|Polarity=Neg",
"86": "NOUN|o|Polarity=Neg|l-obl",
"87": "NOUN|o|Polarity=Neg|root",
"88": "NOUN|o|l-acl",
"89": "NOUN|o|l-advcl",
"90": "NOUN|o|l-ccomp",
"91": "NOUN|o|l-compound",
"92": "NOUN|o|l-csubj",
"93": "NOUN|o|l-csubj:outer",
"94": "NOUN|o|l-nmod",
"95": "NOUN|o|l-nsubj",
"96": "NOUN|o|l-nsubj:outer",
"97": "NOUN|o|l-obj",
"98": "NOUN|o|l-obl",
"99": "NOUN|o|r-compound",
"100": "NOUN|o|r-nmod",
"101": "NOUN|o|r-nsubj",
"102": "NOUN|o|root",
"103": "NOUN|x",
"104": "NOUN|x|Polarity=Neg",
"105": "NOUN|x|Polarity=Neg|l-obl",
"106": "NOUN|x|Polarity=Neg|root",
"107": "NOUN|x|l-acl",
"108": "NOUN|x|l-advcl",
"109": "NOUN|x|l-ccomp",
"110": "NOUN|x|l-compound",
"111": "NOUN|x|l-csubj",
"112": "NOUN|x|l-csubj:outer",
"113": "NOUN|x|l-nmod",
"114": "NOUN|x|l-nsubj",
"115": "NOUN|x|l-nsubj:outer",
"116": "NOUN|x|l-obj",
"117": "NOUN|x|l-obl",
"118": "NOUN|x|r-compound",
"119": "NOUN|x|r-nmod",
"120": "NOUN|x|r-nsubj",
"121": "NOUN|x|root",
"122": "NUM|o",
"123": "NUM|o|l-advcl",
"124": "NUM|o|l-compound",
"125": "NUM|o|l-nmod",
"126": "NUM|o|l-nsubj",
"127": "NUM|o|l-nsubj:outer",
"128": "NUM|o|l-nummod",
"129": "NUM|o|l-obj",
"130": "NUM|o|l-obl",
"131": "NUM|o|r-compound",
"132": "NUM|o|root",
"133": "NUM|x",
"134": "NUM|x|l-advcl",
"135": "NUM|x|l-compound",
"136": "NUM|x|l-nmod",
"137": "NUM|x|l-nsubj",
"138": "NUM|x|l-nsubj:outer",
"139": "NUM|x|l-nummod",
"140": "NUM|x|l-obj",
"141": "NUM|x|l-obl",
"142": "NUM|x|r-compound",
"143": "NUM|x|root",
"144": "PART|o",
"145": "PART|o|l-mark",
"146": "PART|o|r-mark",
"147": "PART|x",
"148": "PART|x|l-mark",
"149": "PART|x|r-mark",
"150": "PRON|o",
"151": "PRON|o|l-acl",
"152": "PRON|o|l-advcl",
"153": "PRON|o|l-nmod",
"154": "PRON|o|l-nsubj",
"155": "PRON|o|l-nsubj:outer",
"156": "PRON|o|l-obj",
"157": "PRON|o|l-obl",
"158": "PRON|o|root",
"159": "PRON|x",
"160": "PRON|x|l-acl",
"161": "PRON|x|l-advcl",
"162": "PRON|x|l-nmod",
"163": "PRON|x|l-nsubj",
"164": "PRON|x|l-nsubj:outer",
"165": "PRON|x|l-obj",
"166": "PRON|x|l-obl",
"167": "PRON|x|root",
"168": "PROPN|o",
"169": "PROPN|o|l-acl",
"170": "PROPN|o|l-advcl",
"171": "PROPN|o|l-compound",
"172": "PROPN|o|l-nmod",
"173": "PROPN|o|l-nsubj",
"174": "PROPN|o|l-nsubj:outer",
"175": "PROPN|o|l-obj",
"176": "PROPN|o|l-obl",
"177": "PROPN|o|r-compound",
"178": "PROPN|o|r-nmod",
"179": "PROPN|o|root",
"180": "PROPN|x",
"181": "PROPN|x|l-acl",
"182": "PROPN|x|l-advcl",
"183": "PROPN|x|l-compound",
"184": "PROPN|x|l-nmod",
"185": "PROPN|x|l-nsubj",
"186": "PROPN|x|l-nsubj:outer",
"187": "PROPN|x|l-obj",
"188": "PROPN|x|l-obl",
"189": "PROPN|x|r-compound",
"190": "PROPN|x|r-nmod",
"191": "PROPN|x|root",
"192": "PUNCT|o",
"193": "PUNCT|o|l-punct",
"194": "PUNCT|o|r-punct",
"195": "PUNCT|x",
"196": "PUNCT|x|l-punct",
"197": "PUNCT|x|r-punct",
"198": "SCONJ|o",
"199": "SCONJ|o|l-dep",
"200": "SCONJ|o|r-fixed",
"201": "SCONJ|o|r-mark",
"202": "SCONJ|x",
"203": "SCONJ|x|l-dep",
"204": "SCONJ|x|r-fixed",
"205": "SCONJ|x|r-mark",
"206": "SYM|o",
"207": "SYM|o|l-compound",
"208": "SYM|o|l-dep",
"209": "SYM|o|l-nmod",
"210": "SYM|o|l-obl",
"211": "SYM|o|r-compound",
"212": "SYM|o|r-dep",
"213": "SYM|x",
"214": "SYM|x|l-compound",
"215": "SYM|x|l-dep",
"216": "SYM|x|l-nmod",
"217": "SYM|x|l-obl",
"218": "SYM|x|r-compound",
"219": "SYM|x|r-dep",
"220": "VERB|o",
"221": "VERB|o|l-acl",
"222": "VERB|o|l-advcl",
"223": "VERB|o|l-ccomp",
"224": "VERB|o|l-compound",
"225": "VERB|o|l-csubj",
"226": "VERB|o|l-csubj:outer",
"227": "VERB|o|l-nmod",
"228": "VERB|o|l-obj",
"229": "VERB|o|l-obl",
"230": "VERB|o|r-acl",
"231": "VERB|o|r-advcl",
"232": "VERB|o|r-compound",
"233": "VERB|o|root",
"234": "VERB|x",
"235": "VERB|x|l-acl",
"236": "VERB|x|l-advcl",
"237": "VERB|x|l-ccomp",
"238": "VERB|x|l-compound",
"239": "VERB|x|l-csubj",
"240": "VERB|x|l-csubj:outer",
"241": "VERB|x|l-nmod",
"242": "VERB|x|l-obj",
"243": "VERB|x|l-obl",
"244": "VERB|x|r-acl",
"245": "VERB|x|r-advcl",
"246": "VERB|x|r-compound",
"247": "VERB|x|root",
"248": "X|o",
"249": "X|o|l-nmod",
"250": "X|o|r-dep",
"251": "X|x",
"252": "X|x|l-nmod",
"253": "X|x|r-dep",
"254": "X|x|r-goeswith"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 1152,
"label2id": {
"ADJ|o": 0,
"ADJ|o|l-acl": 1,
"ADJ|o|l-advcl": 2,
"ADJ|o|l-amod": 3,
"ADJ|o|l-ccomp": 4,
"ADJ|o|l-csubj": 5,
"ADJ|o|l-csubj:outer": 6,
"ADJ|o|l-nmod": 7,
"ADJ|o|l-nsubj": 8,
"ADJ|o|l-obj": 9,
"ADJ|o|l-obl": 10,
"ADJ|o|r-acl": 11,
"ADJ|o|r-amod": 12,
"ADJ|o|r-dep": 13,
"ADJ|o|root": 14,
"ADJ|x": 15,
"ADJ|x|l-acl": 16,
"ADJ|x|l-advcl": 17,
"ADJ|x|l-amod": 18,
"ADJ|x|l-ccomp": 19,
"ADJ|x|l-csubj": 20,
"ADJ|x|l-csubj:outer": 21,
"ADJ|x|l-nmod": 22,
"ADJ|x|l-nsubj": 23,
"ADJ|x|l-obj": 24,
"ADJ|x|l-obl": 25,
"ADJ|x|r-acl": 26,
"ADJ|x|r-amod": 27,
"ADJ|x|r-dep": 28,
"ADJ|x|root": 29,
"ADP|o": 30,
"ADP|o|l-case": 31,
"ADP|o|r-case": 32,
"ADP|o|r-fixed": 33,
"ADP|x": 34,
"ADP|x|l-case": 35,
"ADP|x|r-case": 36,
"ADP|x|r-fixed": 37,
"ADV|o": 38,
"ADV|o|l-advcl": 39,
"ADV|o|l-advmod": 40,
"ADV|o|l-obj": 41,
"ADV|o|r-dep": 42,
"ADV|o|root": 43,
"ADV|x": 44,
"ADV|x|l-advcl": 45,
"ADV|x|l-advmod": 46,
"ADV|x|l-obj": 47,
"ADV|x|r-dep": 48,
"ADV|x|root": 49,
"AUX|o": 50,
"AUX|o|Polarity=Neg": 51,
"AUX|o|Polarity=Neg|r-aux": 52,
"AUX|o|Polarity=Neg|r-fixed": 53,
"AUX|o|r-aux": 54,
"AUX|o|r-cop": 55,
"AUX|o|r-fixed": 56,
"AUX|o|root": 57,
"AUX|x": 58,
"AUX|x|Polarity=Neg": 59,
"AUX|x|Polarity=Neg|r-aux": 60,
"AUX|x|Polarity=Neg|r-fixed": 61,
"AUX|x|r-aux": 62,
"AUX|x|r-cop": 63,
"AUX|x|r-fixed": 64,
"AUX|x|root": 65,
"CCONJ|o": 66,
"CCONJ|o|l-cc": 67,
"CCONJ|o|r-cc": 68,
"CCONJ|x": 69,
"CCONJ|x|l-cc": 70,
"CCONJ|x|r-cc": 71,
"DET|o": 72,
"DET|o|l-det": 73,
"DET|x": 74,
"DET|x|l-det": 75,
"INTJ|o": 76,
"INTJ|o|l-discourse": 77,
"INTJ|o|r-discourse": 78,
"INTJ|o|root": 79,
"INTJ|x": 80,
"INTJ|x|l-discourse": 81,
"INTJ|x|r-discourse": 82,
"INTJ|x|root": 83,
"NOUN|o": 84,
"NOUN|o|Polarity=Neg": 85,
"NOUN|o|Polarity=Neg|l-obl": 86,
"NOUN|o|Polarity=Neg|root": 87,
"NOUN|o|l-acl": 88,
"NOUN|o|l-advcl": 89,
"NOUN|o|l-ccomp": 90,
"NOUN|o|l-compound": 91,
"NOUN|o|l-csubj": 92,
"NOUN|o|l-csubj:outer": 93,
"NOUN|o|l-nmod": 94,
"NOUN|o|l-nsubj": 95,
"NOUN|o|l-nsubj:outer": 96,
"NOUN|o|l-obj": 97,
"NOUN|o|l-obl": 98,
"NOUN|o|r-compound": 99,
"NOUN|o|r-nmod": 100,
"NOUN|o|r-nsubj": 101,
"NOUN|o|root": 102,
"NOUN|x": 103,
"NOUN|x|Polarity=Neg": 104,
"NOUN|x|Polarity=Neg|l-obl": 105,
"NOUN|x|Polarity=Neg|root": 106,
"NOUN|x|l-acl": 107,
"NOUN|x|l-advcl": 108,
"NOUN|x|l-ccomp": 109,
"NOUN|x|l-compound": 110,
"NOUN|x|l-csubj": 111,
"NOUN|x|l-csubj:outer": 112,
"NOUN|x|l-nmod": 113,
"NOUN|x|l-nsubj": 114,
"NOUN|x|l-nsubj:outer": 115,
"NOUN|x|l-obj": 116,
"NOUN|x|l-obl": 117,
"NOUN|x|r-compound": 118,
"NOUN|x|r-nmod": 119,
"NOUN|x|r-nsubj": 120,
"NOUN|x|root": 121,
"NUM|o": 122,
"NUM|o|l-advcl": 123,
"NUM|o|l-compound": 124,
"NUM|o|l-nmod": 125,
"NUM|o|l-nsubj": 126,
"NUM|o|l-nsubj:outer": 127,
"NUM|o|l-nummod": 128,
"NUM|o|l-obj": 129,
"NUM|o|l-obl": 130,
"NUM|o|r-compound": 131,
"NUM|o|root": 132,
"NUM|x": 133,
"NUM|x|l-advcl": 134,
"NUM|x|l-compound": 135,
"NUM|x|l-nmod": 136,
"NUM|x|l-nsubj": 137,
"NUM|x|l-nsubj:outer": 138,
"NUM|x|l-nummod": 139,
"NUM|x|l-obj": 140,
"NUM|x|l-obl": 141,
"NUM|x|r-compound": 142,
"NUM|x|root": 143,
"PART|o": 144,
"PART|o|l-mark": 145,
"PART|o|r-mark": 146,
"PART|x": 147,
"PART|x|l-mark": 148,
"PART|x|r-mark": 149,
"PRON|o": 150,
"PRON|o|l-acl": 151,
"PRON|o|l-advcl": 152,
"PRON|o|l-nmod": 153,
"PRON|o|l-nsubj": 154,
"PRON|o|l-nsubj:outer": 155,
"PRON|o|l-obj": 156,
"PRON|o|l-obl": 157,
"PRON|o|root": 158,
"PRON|x": 159,
"PRON|x|l-acl": 160,
"PRON|x|l-advcl": 161,
"PRON|x|l-nmod": 162,
"PRON|x|l-nsubj": 163,
"PRON|x|l-nsubj:outer": 164,
"PRON|x|l-obj": 165,
"PRON|x|l-obl": 166,
"PRON|x|root": 167,
"PROPN|o": 168,
"PROPN|o|l-acl": 169,
"PROPN|o|l-advcl": 170,
"PROPN|o|l-compound": 171,
"PROPN|o|l-nmod": 172,
"PROPN|o|l-nsubj": 173,
"PROPN|o|l-nsubj:outer": 174,
"PROPN|o|l-obj": 175,
"PROPN|o|l-obl": 176,
"PROPN|o|r-compound": 177,
"PROPN|o|r-nmod": 178,
"PROPN|o|root": 179,
"PROPN|x": 180,
"PROPN|x|l-acl": 181,
"PROPN|x|l-advcl": 182,
"PROPN|x|l-compound": 183,
"PROPN|x|l-nmod": 184,
"PROPN|x|l-nsubj": 185,
"PROPN|x|l-nsubj:outer": 186,
"PROPN|x|l-obj": 187,
"PROPN|x|l-obl": 188,
"PROPN|x|r-compound": 189,
"PROPN|x|r-nmod": 190,
"PROPN|x|root": 191,
"PUNCT|o": 192,
"PUNCT|o|l-punct": 193,
"PUNCT|o|r-punct": 194,
"PUNCT|x": 195,
"PUNCT|x|l-punct": 196,
"PUNCT|x|r-punct": 197,
"SCONJ|o": 198,
"SCONJ|o|l-dep": 199,
"SCONJ|o|r-fixed": 200,
"SCONJ|o|r-mark": 201,
"SCONJ|x": 202,
"SCONJ|x|l-dep": 203,
"SCONJ|x|r-fixed": 204,
"SCONJ|x|r-mark": 205,
"SYM|o": 206,
"SYM|o|l-compound": 207,
"SYM|o|l-dep": 208,
"SYM|o|l-nmod": 209,
"SYM|o|l-obl": 210,
"SYM|o|r-compound": 211,
"SYM|o|r-dep": 212,
"SYM|x": 213,
"SYM|x|l-compound": 214,
"SYM|x|l-dep": 215,
"SYM|x|l-nmod": 216,
"SYM|x|l-obl": 217,
"SYM|x|r-compound": 218,
"SYM|x|r-dep": 219,
"VERB|o": 220,
"VERB|o|l-acl": 221,
"VERB|o|l-advcl": 222,
"VERB|o|l-ccomp": 223,
"VERB|o|l-compound": 224,
"VERB|o|l-csubj": 225,
"VERB|o|l-csubj:outer": 226,
"VERB|o|l-nmod": 227,
"VERB|o|l-obj": 228,
"VERB|o|l-obl": 229,
"VERB|o|r-acl": 230,
"VERB|o|r-advcl": 231,
"VERB|o|r-compound": 232,
"VERB|o|root": 233,
"VERB|x": 234,
"VERB|x|l-acl": 235,
"VERB|x|l-advcl": 236,
"VERB|x|l-ccomp": 237,
"VERB|x|l-compound": 238,
"VERB|x|l-csubj": 239,
"VERB|x|l-csubj:outer": 240,
"VERB|x|l-nmod": 241,
"VERB|x|l-obj": 242,
"VERB|x|l-obl": 243,
"VERB|x|r-acl": 244,
"VERB|x|r-advcl": 245,
"VERB|x|r-compound": 246,
"VERB|x|root": 247,
"X|o": 248,
"X|o|l-nmod": 249,
"X|o|r-dep": 250,
"X|x": 251,
"X|x|l-nmod": 252,
"X|x|r-dep": 253,
"X|x|r-goeswith": 254
},
"layer_norm_eps": 1e-05,
"local_attention": 128,
"local_rope_theta": 10000.0,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 12,
"num_hidden_layers": 22,
"pad_token_id": 1,
"position_embedding_type": "absolute",
"reference_compile": true,
"sep_token_id": 2,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"tokenizer_class": "DebertaV2TokenizerFast",
"torch_dtype": "float32",
"transformers_version": "4.47.1",
"vocab_size": 65000
}