|
{ |
|
"version": "1.0", |
|
"truncation": null, |
|
"padding": null, |
|
"added_tokens": [ |
|
{ |
|
"id": 0, |
|
"content": "<unk>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 53, |
|
"content": "<s>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 54, |
|
"content": "</s>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 55, |
|
"content": "<pad>", |
|
"single_word": false, |
|
"lstrip": false, |
|
"rstrip": false, |
|
"normalized": true, |
|
"special": true |
|
}, |
|
{ |
|
"id": 56, |
|
"content": "<mask>", |
|
"single_word": false, |
|
"lstrip": true, |
|
"rstrip": false, |
|
"normalized": false, |
|
"special": true |
|
} |
|
], |
|
"normalizer": null, |
|
"pre_tokenizer": { |
|
"type": "ByteLevel", |
|
"add_prefix_space": false, |
|
"trim_offsets": true, |
|
"use_regex": true |
|
}, |
|
"post_processor": { |
|
"type": "RobertaProcessing", |
|
"sep": [ |
|
"</s>", |
|
54 |
|
], |
|
"cls": [ |
|
"<s>", |
|
53 |
|
], |
|
"trim_offsets": true, |
|
"add_prefix_space": false |
|
}, |
|
"decoder": { |
|
"type": "ByteLevel", |
|
"add_prefix_space": true, |
|
"trim_offsets": true, |
|
"use_regex": true |
|
}, |
|
"model": { |
|
"type": "BPE", |
|
"dropout": null, |
|
"unk_token": null, |
|
"continuing_subword_prefix": "", |
|
"end_of_word_suffix": "", |
|
"fuse_unk": false, |
|
"byte_fallback": false, |
|
"vocab": { |
|
"<unk>": 0, |
|
"a": 1, |
|
"b": 2, |
|
"c": 3, |
|
"d": 4, |
|
"e": 5, |
|
"f": 6, |
|
"g": 7, |
|
"h": 8, |
|
"i": 9, |
|
"j": 10, |
|
"k": 11, |
|
"l": 12, |
|
"m": 13, |
|
"n": 14, |
|
"o": 15, |
|
"p": 16, |
|
"q": 17, |
|
"r": 18, |
|
"s": 19, |
|
"t": 20, |
|
"u": 21, |
|
"v": 22, |
|
"w": 23, |
|
"x": 24, |
|
"y": 25, |
|
"z": 26, |
|
"x</w>": 27, |
|
"v</w>": 28, |
|
"d</w>": 29, |
|
"r</w>": 30, |
|
"s</w>": 31, |
|
"w</w>": 32, |
|
"c</w>": 33, |
|
"k</w>": 34, |
|
"n</w>": 35, |
|
"y</w>": 36, |
|
"p</w>": 37, |
|
"j</w>": 38, |
|
"g</w>": 39, |
|
"f</w>": 40, |
|
"t</w>": 41, |
|
"z</w>": 42, |
|
"a</w>": 43, |
|
"e</w>": 44, |
|
"o</w>": 45, |
|
"q</w>": 46, |
|
"i</w>": 47, |
|
"u</w>": 48, |
|
"m</w>": 49, |
|
"h</w>": 50, |
|
"l</w>": 51, |
|
"b</w>": 52 |
|
}, |
|
"merges": [] |
|
} |
|
} |