KoichiYasuoka's picture
model improved
bcdd2d8
{
"architectures": [
"ModernBertForTokenClassification"
],
"attention_bias": false,
"attention_dropout": 0.0,
"auto_map": {
"AutoConfig": "configuration_modernbert.ModernBertConfig",
"AutoModel": "modeling_modernbert.ModernBertModel",
"AutoModelForMaskedLM": "modeling_modernbert.ModernBertForMaskedLM",
"AutoModelForSequenceClassification": "modeling_modernbert.ModernBertForSequenceClassification",
"AutoModelForTokenClassification": "modeling_modernbert.ModernBertForTokenClassification"
},
"bos_token_id": 0,
"classifier_activation": "gelu",
"classifier_bias": false,
"classifier_dropout": 0.0,
"classifier_pooling": "mean",
"cls_token_id": 0,
"custom_pipelines": {
"universal-dependencies": {
"impl": "ud.UniversalDependenciesPipeline",
"pt": "AutoModelForTokenClassification"
}
},
"decoder_bias": true,
"deterministic_flash_attn": false,
"embedding_dropout": 0.0,
"eos_token_id": 2,
"global_attn_every_n_layers": 3,
"global_rope_theta": 160000.0,
"gradient_checkpointing": false,
"hidden_activation": "gelu",
"hidden_size": 1024,
"id2label": {
"0": "ADJ",
"1": "ADJ.",
"2": "ADJ.|[acl]",
"3": "ADJ.|[advcl]",
"4": "ADJ.|[amod]",
"5": "ADJ.|[ccomp]",
"6": "ADJ.|[csubj:outer]",
"7": "ADJ.|[csubj]",
"8": "ADJ.|[dep]",
"9": "ADJ.|[nmod]",
"10": "ADJ.|[nsubj]",
"11": "ADJ.|[obj]",
"12": "ADJ.|[obl]",
"13": "ADJ.|[root]",
"14": "ADJ|[acl]",
"15": "ADJ|[advcl]",
"16": "ADJ|[amod]",
"17": "ADJ|[ccomp]",
"18": "ADJ|[csubj:outer]",
"19": "ADJ|[csubj]",
"20": "ADJ|[dep]",
"21": "ADJ|[nmod]",
"22": "ADJ|[nsubj]",
"23": "ADJ|[obj]",
"24": "ADJ|[obl]",
"25": "ADJ|[root]",
"26": "ADP",
"27": "ADP.",
"28": "ADP.|[case]",
"29": "ADP.|[fixed]",
"30": "ADP|[case]",
"31": "ADP|[fixed]",
"32": "ADV",
"33": "ADV.",
"34": "ADV.|[advcl]",
"35": "ADV.|[advmod]",
"36": "ADV.|[dep]",
"37": "ADV.|[obj]",
"38": "ADV.|[root]",
"39": "ADV|[advcl]",
"40": "ADV|[advmod]",
"41": "ADV|[dep]",
"42": "ADV|[obj]",
"43": "ADV|[root]",
"44": "AUX",
"45": "AUX.",
"46": "AUX.|Polarity=Neg",
"47": "AUX.|Polarity=Neg|[aux]",
"48": "AUX.|Polarity=Neg|[fixed]",
"49": "AUX.|[aux]",
"50": "AUX.|[cop]",
"51": "AUX.|[fixed]",
"52": "AUX.|[root]",
"53": "AUX|Polarity=Neg",
"54": "AUX|Polarity=Neg|[aux]",
"55": "AUX|Polarity=Neg|[fixed]",
"56": "AUX|[aux]",
"57": "AUX|[cop]",
"58": "AUX|[fixed]",
"59": "AUX|[root]",
"60": "CCONJ",
"61": "CCONJ.",
"62": "CCONJ.|[cc]",
"63": "CCONJ|[cc]",
"64": "DET",
"65": "DET.",
"66": "DET.|[det]",
"67": "DET|[det]",
"68": "INTJ",
"69": "INTJ.",
"70": "INTJ.|[discourse]",
"71": "INTJ.|[root]",
"72": "INTJ|[discourse]",
"73": "INTJ|[root]",
"74": "NOUN",
"75": "NOUN.",
"76": "NOUN.|Polarity=Neg",
"77": "NOUN.|Polarity=Neg|[obl]",
"78": "NOUN.|Polarity=Neg|[root]",
"79": "NOUN.|[acl]",
"80": "NOUN.|[advcl]",
"81": "NOUN.|[ccomp]",
"82": "NOUN.|[compound]",
"83": "NOUN.|[csubj:outer]",
"84": "NOUN.|[csubj]",
"85": "NOUN.|[nmod]",
"86": "NOUN.|[nsubj:outer]",
"87": "NOUN.|[nsubj]",
"88": "NOUN.|[obj]",
"89": "NOUN.|[obl]",
"90": "NOUN.|[root]",
"91": "NOUN|Polarity=Neg",
"92": "NOUN|Polarity=Neg|[obl]",
"93": "NOUN|Polarity=Neg|[root]",
"94": "NOUN|[acl]",
"95": "NOUN|[advcl]",
"96": "NOUN|[ccomp]",
"97": "NOUN|[compound]",
"98": "NOUN|[csubj:outer]",
"99": "NOUN|[csubj]",
"100": "NOUN|[nmod]",
"101": "NOUN|[nsubj:outer]",
"102": "NOUN|[nsubj]",
"103": "NOUN|[obj]",
"104": "NOUN|[obl]",
"105": "NOUN|[root]",
"106": "NUM",
"107": "NUM.",
"108": "NUM.|[advcl]",
"109": "NUM.|[compound]",
"110": "NUM.|[nmod]",
"111": "NUM.|[nsubj:outer]",
"112": "NUM.|[nsubj]",
"113": "NUM.|[nummod]",
"114": "NUM.|[obj]",
"115": "NUM.|[obl]",
"116": "NUM.|[root]",
"117": "NUM|[advcl]",
"118": "NUM|[compound]",
"119": "NUM|[nmod]",
"120": "NUM|[nsubj:outer]",
"121": "NUM|[nsubj]",
"122": "NUM|[nummod]",
"123": "NUM|[obj]",
"124": "NUM|[obl]",
"125": "NUM|[root]",
"126": "PART",
"127": "PART.",
"128": "PART.|[mark]",
"129": "PART|[mark]",
"130": "PRON",
"131": "PRON.",
"132": "PRON.|[acl]",
"133": "PRON.|[advcl]",
"134": "PRON.|[nmod]",
"135": "PRON.|[nsubj:outer]",
"136": "PRON.|[nsubj]",
"137": "PRON.|[obj]",
"138": "PRON.|[obl]",
"139": "PRON.|[root]",
"140": "PRON|[acl]",
"141": "PRON|[advcl]",
"142": "PRON|[nmod]",
"143": "PRON|[nsubj:outer]",
"144": "PRON|[nsubj]",
"145": "PRON|[obj]",
"146": "PRON|[obl]",
"147": "PRON|[root]",
"148": "PROPN",
"149": "PROPN.",
"150": "PROPN.|[acl]",
"151": "PROPN.|[advcl]",
"152": "PROPN.|[compound]",
"153": "PROPN.|[nmod]",
"154": "PROPN.|[nsubj:outer]",
"155": "PROPN.|[nsubj]",
"156": "PROPN.|[obj]",
"157": "PROPN.|[obl]",
"158": "PROPN.|[root]",
"159": "PROPN|[acl]",
"160": "PROPN|[advcl]",
"161": "PROPN|[compound]",
"162": "PROPN|[nmod]",
"163": "PROPN|[nsubj:outer]",
"164": "PROPN|[nsubj]",
"165": "PROPN|[obj]",
"166": "PROPN|[obl]",
"167": "PROPN|[root]",
"168": "PUNCT",
"169": "PUNCT.",
"170": "PUNCT.|[punct]",
"171": "PUNCT|[punct]",
"172": "SCONJ",
"173": "SCONJ.",
"174": "SCONJ.|[dep]",
"175": "SCONJ.|[fixed]",
"176": "SCONJ.|[mark]",
"177": "SCONJ|[dep]",
"178": "SCONJ|[fixed]",
"179": "SCONJ|[mark]",
"180": "SYM",
"181": "SYM.",
"182": "SYM.|[compound]",
"183": "SYM.|[dep]",
"184": "SYM.|[nmod]",
"185": "SYM.|[obl]",
"186": "SYM|[compound]",
"187": "SYM|[dep]",
"188": "SYM|[nmod]",
"189": "SYM|[obl]",
"190": "VERB",
"191": "VERB.",
"192": "VERB.|[acl]",
"193": "VERB.|[advcl]",
"194": "VERB.|[ccomp]",
"195": "VERB.|[compound]",
"196": "VERB.|[csubj:outer]",
"197": "VERB.|[csubj]",
"198": "VERB.|[nmod]",
"199": "VERB.|[obj]",
"200": "VERB.|[obl]",
"201": "VERB.|[root]",
"202": "VERB|[acl]",
"203": "VERB|[advcl]",
"204": "VERB|[ccomp]",
"205": "VERB|[compound]",
"206": "VERB|[csubj:outer]",
"207": "VERB|[csubj]",
"208": "VERB|[nmod]",
"209": "VERB|[obj]",
"210": "VERB|[obl]",
"211": "VERB|[root]",
"212": "X",
"213": "X.",
"214": "X.|[dep]",
"215": "X.|[goeswith]",
"216": "X.|[nmod]",
"217": "X|[dep]",
"218": "X|[nmod]"
},
"initializer_cutoff_factor": 2.0,
"initializer_range": 0.02,
"intermediate_size": 2624,
"label2id": {
"ADJ": 0,
"ADJ.": 1,
"ADJ.|[acl]": 2,
"ADJ.|[advcl]": 3,
"ADJ.|[amod]": 4,
"ADJ.|[ccomp]": 5,
"ADJ.|[csubj:outer]": 6,
"ADJ.|[csubj]": 7,
"ADJ.|[dep]": 8,
"ADJ.|[nmod]": 9,
"ADJ.|[nsubj]": 10,
"ADJ.|[obj]": 11,
"ADJ.|[obl]": 12,
"ADJ.|[root]": 13,
"ADJ|[acl]": 14,
"ADJ|[advcl]": 15,
"ADJ|[amod]": 16,
"ADJ|[ccomp]": 17,
"ADJ|[csubj:outer]": 18,
"ADJ|[csubj]": 19,
"ADJ|[dep]": 20,
"ADJ|[nmod]": 21,
"ADJ|[nsubj]": 22,
"ADJ|[obj]": 23,
"ADJ|[obl]": 24,
"ADJ|[root]": 25,
"ADP": 26,
"ADP.": 27,
"ADP.|[case]": 28,
"ADP.|[fixed]": 29,
"ADP|[case]": 30,
"ADP|[fixed]": 31,
"ADV": 32,
"ADV.": 33,
"ADV.|[advcl]": 34,
"ADV.|[advmod]": 35,
"ADV.|[dep]": 36,
"ADV.|[obj]": 37,
"ADV.|[root]": 38,
"ADV|[advcl]": 39,
"ADV|[advmod]": 40,
"ADV|[dep]": 41,
"ADV|[obj]": 42,
"ADV|[root]": 43,
"AUX": 44,
"AUX.": 45,
"AUX.|Polarity=Neg": 46,
"AUX.|Polarity=Neg|[aux]": 47,
"AUX.|Polarity=Neg|[fixed]": 48,
"AUX.|[aux]": 49,
"AUX.|[cop]": 50,
"AUX.|[fixed]": 51,
"AUX.|[root]": 52,
"AUX|Polarity=Neg": 53,
"AUX|Polarity=Neg|[aux]": 54,
"AUX|Polarity=Neg|[fixed]": 55,
"AUX|[aux]": 56,
"AUX|[cop]": 57,
"AUX|[fixed]": 58,
"AUX|[root]": 59,
"CCONJ": 60,
"CCONJ.": 61,
"CCONJ.|[cc]": 62,
"CCONJ|[cc]": 63,
"DET": 64,
"DET.": 65,
"DET.|[det]": 66,
"DET|[det]": 67,
"INTJ": 68,
"INTJ.": 69,
"INTJ.|[discourse]": 70,
"INTJ.|[root]": 71,
"INTJ|[discourse]": 72,
"INTJ|[root]": 73,
"NOUN": 74,
"NOUN.": 75,
"NOUN.|Polarity=Neg": 76,
"NOUN.|Polarity=Neg|[obl]": 77,
"NOUN.|Polarity=Neg|[root]": 78,
"NOUN.|[acl]": 79,
"NOUN.|[advcl]": 80,
"NOUN.|[ccomp]": 81,
"NOUN.|[compound]": 82,
"NOUN.|[csubj:outer]": 83,
"NOUN.|[csubj]": 84,
"NOUN.|[nmod]": 85,
"NOUN.|[nsubj:outer]": 86,
"NOUN.|[nsubj]": 87,
"NOUN.|[obj]": 88,
"NOUN.|[obl]": 89,
"NOUN.|[root]": 90,
"NOUN|Polarity=Neg": 91,
"NOUN|Polarity=Neg|[obl]": 92,
"NOUN|Polarity=Neg|[root]": 93,
"NOUN|[acl]": 94,
"NOUN|[advcl]": 95,
"NOUN|[ccomp]": 96,
"NOUN|[compound]": 97,
"NOUN|[csubj:outer]": 98,
"NOUN|[csubj]": 99,
"NOUN|[nmod]": 100,
"NOUN|[nsubj:outer]": 101,
"NOUN|[nsubj]": 102,
"NOUN|[obj]": 103,
"NOUN|[obl]": 104,
"NOUN|[root]": 105,
"NUM": 106,
"NUM.": 107,
"NUM.|[advcl]": 108,
"NUM.|[compound]": 109,
"NUM.|[nmod]": 110,
"NUM.|[nsubj:outer]": 111,
"NUM.|[nsubj]": 112,
"NUM.|[nummod]": 113,
"NUM.|[obj]": 114,
"NUM.|[obl]": 115,
"NUM.|[root]": 116,
"NUM|[advcl]": 117,
"NUM|[compound]": 118,
"NUM|[nmod]": 119,
"NUM|[nsubj:outer]": 120,
"NUM|[nsubj]": 121,
"NUM|[nummod]": 122,
"NUM|[obj]": 123,
"NUM|[obl]": 124,
"NUM|[root]": 125,
"PART": 126,
"PART.": 127,
"PART.|[mark]": 128,
"PART|[mark]": 129,
"PRON": 130,
"PRON.": 131,
"PRON.|[acl]": 132,
"PRON.|[advcl]": 133,
"PRON.|[nmod]": 134,
"PRON.|[nsubj:outer]": 135,
"PRON.|[nsubj]": 136,
"PRON.|[obj]": 137,
"PRON.|[obl]": 138,
"PRON.|[root]": 139,
"PRON|[acl]": 140,
"PRON|[advcl]": 141,
"PRON|[nmod]": 142,
"PRON|[nsubj:outer]": 143,
"PRON|[nsubj]": 144,
"PRON|[obj]": 145,
"PRON|[obl]": 146,
"PRON|[root]": 147,
"PROPN": 148,
"PROPN.": 149,
"PROPN.|[acl]": 150,
"PROPN.|[advcl]": 151,
"PROPN.|[compound]": 152,
"PROPN.|[nmod]": 153,
"PROPN.|[nsubj:outer]": 154,
"PROPN.|[nsubj]": 155,
"PROPN.|[obj]": 156,
"PROPN.|[obl]": 157,
"PROPN.|[root]": 158,
"PROPN|[acl]": 159,
"PROPN|[advcl]": 160,
"PROPN|[compound]": 161,
"PROPN|[nmod]": 162,
"PROPN|[nsubj:outer]": 163,
"PROPN|[nsubj]": 164,
"PROPN|[obj]": 165,
"PROPN|[obl]": 166,
"PROPN|[root]": 167,
"PUNCT": 168,
"PUNCT.": 169,
"PUNCT.|[punct]": 170,
"PUNCT|[punct]": 171,
"SCONJ": 172,
"SCONJ.": 173,
"SCONJ.|[dep]": 174,
"SCONJ.|[fixed]": 175,
"SCONJ.|[mark]": 176,
"SCONJ|[dep]": 177,
"SCONJ|[fixed]": 178,
"SCONJ|[mark]": 179,
"SYM": 180,
"SYM.": 181,
"SYM.|[compound]": 182,
"SYM.|[dep]": 183,
"SYM.|[nmod]": 184,
"SYM.|[obl]": 185,
"SYM|[compound]": 186,
"SYM|[dep]": 187,
"SYM|[nmod]": 188,
"SYM|[obl]": 189,
"VERB": 190,
"VERB.": 191,
"VERB.|[acl]": 192,
"VERB.|[advcl]": 193,
"VERB.|[ccomp]": 194,
"VERB.|[compound]": 195,
"VERB.|[csubj:outer]": 196,
"VERB.|[csubj]": 197,
"VERB.|[nmod]": 198,
"VERB.|[obj]": 199,
"VERB.|[obl]": 200,
"VERB.|[root]": 201,
"VERB|[acl]": 202,
"VERB|[advcl]": 203,
"VERB|[ccomp]": 204,
"VERB|[compound]": 205,
"VERB|[csubj:outer]": 206,
"VERB|[csubj]": 207,
"VERB|[nmod]": 208,
"VERB|[obj]": 209,
"VERB|[obl]": 210,
"VERB|[root]": 211,
"X": 212,
"X.": 213,
"X.|[dep]": 214,
"X.|[goeswith]": 215,
"X.|[nmod]": 216,
"X|[dep]": 217,
"X|[nmod]": 218
},
"layer_norm_eps": 1e-05,
"local_attention": 128,
"local_rope_theta": 10000.0,
"max_position_embeddings": 8192,
"mlp_bias": false,
"mlp_dropout": 0.0,
"model_type": "modernbert",
"norm_bias": false,
"norm_eps": 1e-05,
"num_attention_heads": 16,
"num_hidden_layers": 28,
"pad_token_id": 1,
"position_embedding_type": "absolute",
"reference_compile": true,
"sep_token_id": 2,
"sparse_pred_ignore_index": -100,
"sparse_prediction": false,
"tokenizer_class": "DebertaV2TokenizerFast",
"torch_dtype": "float32",
"transformers_version": "4.47.1",
"vocab_size": 65000
}