{ | |
"version": "1.0", | |
"truncation": { | |
"direction": "Right", | |
"max_length": 512, | |
"strategy": "LongestFirst", | |
"stride": 0 | |
}, | |
"padding": { | |
"strategy": "BatchLongest", | |
"direction": "Right", | |
"pad_to_multiple_of": null, | |
"pad_id": 1, | |
"pad_type_id": 0, | |
"pad_token": "[PAD]" | |
}, | |
"added_tokens": [ | |
{ | |
"id": 0, | |
"content": "[CLS]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 1, | |
"content": "[PAD]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 2, | |
"content": "[SEP]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 3, | |
"content": "[UNK]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
}, | |
{ | |
"id": 301, | |
"content": "[MASK]", | |
"single_word": false, | |
"lstrip": false, | |
"rstrip": false, | |
"normalized": false, | |
"special": true | |
} | |
], | |
"normalizer": null, | |
"pre_tokenizer": { | |
"type": "Split", | |
"pattern": { | |
"String": "" | |
}, | |
"behavior": "Isolated", | |
"invert": false | |
}, | |
"post_processor": { | |
"type": "TemplateProcessing", | |
"single": [ | |
{ | |
"SpecialToken": { | |
"id": "[CLS]", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "A", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"SpecialToken": { | |
"id": "[SEP]", | |
"type_id": 0 | |
} | |
} | |
], | |
"pair": [ | |
{ | |
"SpecialToken": { | |
"id": "[CLS]", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "A", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"SpecialToken": { | |
"id": "[SEP]", | |
"type_id": 0 | |
} | |
}, | |
{ | |
"Sequence": { | |
"id": "B", | |
"type_id": 1 | |
} | |
}, | |
{ | |
"SpecialToken": { | |
"id": "[SEP]", | |
"type_id": 1 | |
} | |
} | |
], | |
"special_tokens": { | |
"[CLS]": { | |
"id": "[CLS]", | |
"ids": [ | |
0 | |
], | |
"tokens": [ | |
"[CLS]" | |
] | |
}, | |
"[SEP]": { | |
"id": "[SEP]", | |
"ids": [ | |
2 | |
], | |
"tokens": [ | |
"[SEP]" | |
] | |
} | |
} | |
}, | |
"decoder": null, | |
"model": { | |
"type": "WordLevel", | |
"vocab": { | |
"[CLS]": 0, | |
"[PAD]": 1, | |
"[SEP]": 2, | |
"[UNK]": 3, | |
"unused0": 4, | |
"unused1": 5, | |
"unused2": 6, | |
"unused3": 7, | |
"unused4": 8, | |
"unused5": 9, | |
"unused6": 10, | |
"unused7": 11, | |
"unused8": 12, | |
"unused9": 13, | |
"unused10": 14, | |
"unused11": 15, | |
"unused12": 16, | |
"unused13": 17, | |
"unused14": 18, | |
"unused15": 19, | |
"unused16": 20, | |
"unused17": 21, | |
"unused18": 22, | |
"unused19": 23, | |
"unused20": 24, | |
"unused21": 25, | |
"unused22": 26, | |
"unused23": 27, | |
"unused24": 28, | |
"unused25": 29, | |
"unused26": 30, | |
"unused27": 31, | |
"unused28": 32, | |
"unused29": 33, | |
"unused30": 34, | |
"unused31": 35, | |
"unused32": 36, | |
"unused33": 37, | |
"unused34": 38, | |
"unused35": 39, | |
"unused36": 40, | |
"unused37": 41, | |
"unused38": 42, | |
"unused39": 43, | |
"unused40": 44, | |
"unused41": 45, | |
"unused42": 46, | |
"unused43": 47, | |
"unused44": 48, | |
"unused45": 49, | |
"unused46": 50, | |
"unused47": 51, | |
"unused48": 52, | |
"unused49": 53, | |
"unused50": 54, | |
"unused51": 55, | |
"unused52": 56, | |
"unused53": 57, | |
"unused54": 58, | |
"unused55": 59, | |
"unused56": 60, | |
"unused57": 61, | |
"unused58": 62, | |
"unused59": 63, | |
"unused60": 64, | |
"unused61": 65, | |
"unused62": 66, | |
"unused63": 67, | |
"unused64": 68, | |
"unused65": 69, | |
"unused66": 70, | |
"unused67": 71, | |
"unused68": 72, | |
"unused69": 73, | |
"unused70": 74, | |
"unused71": 75, | |
"unused72": 76, | |
"unused73": 77, | |
"unused74": 78, | |
"unused75": 79, | |
"unused76": 80, | |
"unused77": 81, | |
"unused78": 82, | |
"unused79": 83, | |
"unused80": 84, | |
"unused81": 85, | |
"unused82": 86, | |
"unused83": 87, | |
"unused84": 88, | |
"unused85": 89, | |
"unused86": 90, | |
"unused87": 91, | |
"unused88": 92, | |
"unused89": 93, | |
"unused90": 94, | |
"unused91": 95, | |
"unused92": 96, | |
"unused93": 97, | |
"unused94": 98, | |
"unused95": 99, | |
"unused96": 100, | |
"unused97": 101, | |
"unused98": 102, | |
"unused99": 103, | |
" ": 104, | |
"!": 105, | |
"\"": 106, | |
"#": 107, | |
"$": 108, | |
"%": 109, | |
"&": 110, | |
"'": 111, | |
"(": 112, | |
")": 113, | |
"*": 114, | |
"+": 115, | |
",": 116, | |
"-": 117, | |
".": 118, | |
"/": 119, | |
"0": 120, | |
"1": 121, | |
"2": 122, | |
"3": 123, | |
"4": 124, | |
"5": 125, | |
"6": 126, | |
"7": 127, | |
"8": 128, | |
"9": 129, | |
":": 130, | |
";": 131, | |
"<": 132, | |
"=": 133, | |
">": 134, | |
"?": 135, | |
"@": 136, | |
"A": 137, | |
"B": 138, | |
"C": 139, | |
"D": 140, | |
"E": 141, | |
"F": 142, | |
"G": 143, | |
"H": 144, | |
"I": 145, | |
"J": 146, | |
"K": 147, | |
"L": 148, | |
"M": 149, | |
"N": 150, | |
"O": 151, | |
"P": 152, | |
"Q": 153, | |
"R": 154, | |
"S": 155, | |
"T": 156, | |
"U": 157, | |
"V": 158, | |
"W": 159, | |
"X": 160, | |
"Y": 161, | |
"Z": 162, | |
"[": 163, | |
"\\": 164, | |
"]": 165, | |
"^": 166, | |
"_": 167, | |
"a": 168, | |
"b": 169, | |
"c": 170, | |
"d": 171, | |
"e": 172, | |
"f": 173, | |
"g": 174, | |
"h": 175, | |
"i": 176, | |
"j": 177, | |
"k": 178, | |
"l": 179, | |
"m": 180, | |
"n": 181, | |
"o": 182, | |
"p": 183, | |
"q": 184, | |
"r": 185, | |
"s": 186, | |
"t": 187, | |
"u": 188, | |
"v": 189, | |
"w": 190, | |
"x": 191, | |
"y": 192, | |
"z": 193, | |
"{": 194, | |
"|": 195, | |
"}": 196, | |
"~": 197, | |
"«": 198, | |
"°": 199, | |
"·": 200, | |
"»": 201, | |
"é": 202, | |
"а": 203, | |
"в": 204, | |
"д": 205, | |
"е": 206, | |
"и": 207, | |
"к": 208, | |
"л": 209, | |
"м": 210, | |
"н": 211, | |
"о": 212, | |
"п": 213, | |
"р": 214, | |
"с": 215, | |
"т": 216, | |
"،": 217, | |
"؛": 218, | |
"؟": 219, | |
"ء": 220, | |
"آ": 221, | |
"أ": 222, | |
"ؤ": 223, | |
"إ": 224, | |
"ئ": 225, | |
"ا": 226, | |
"ب": 227, | |
"ة": 228, | |
"ت": 229, | |
"ث": 230, | |
"ج": 231, | |
"ح": 232, | |
"خ": 233, | |
"د": 234, | |
"ذ": 235, | |
"ر": 236, | |
"ز": 237, | |
"س": 238, | |
"ش": 239, | |
"ص": 240, | |
"ض": 241, | |
"ط": 242, | |
"ظ": 243, | |
"ع": 244, | |
"غ": 245, | |
"ـ": 246, | |
"ف": 247, | |
"ق": 248, | |
"ك": 249, | |
"ل": 250, | |
"م": 251, | |
"ن": 252, | |
"ه": 253, | |
"و": 254, | |
"ى": 255, | |
"ي": 256, | |
"ً": 257, | |
"ٌ": 258, | |
"ٍ": 259, | |
"َ": 260, | |
"ُ": 261, | |
"ِ": 262, | |
"ّ": 263, | |
"ْ": 264, | |
"٠": 265, | |
"١": 266, | |
"٢": 267, | |
"٣": 268, | |
"٤": 269, | |
"٥": 270, | |
"٦": 271, | |
"٧": 272, | |
"٨": 273, | |
"٩": 274, | |
"٪": 275, | |
"پ": 276, | |
"ک": 277, | |
"گ": 278, | |
"ھ": 279, | |
"ی": 280, | |
"": 281, | |
"": 282, | |
"": 283, | |
"": 284, | |
"–": 285, | |
"—": 286, | |
"‘": 287, | |
"’": 288, | |
"“": 289, | |
"”": 290, | |
"•": 291, | |
"…": 292, | |
"": 293, | |
"": 294, | |
"": 295, | |
"": 296, | |
"": 297, | |
"﴾": 298, | |
"﴿": 299, | |
"�": 300, | |
"[MASK]": 301 | |
}, | |
"unk_token": "[UNK]" | |
} | |
} |