{ "version": "1.0", "truncation": null, "padding": null, "added_tokens": [ { "id": 24, "content": "[UNK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 25, "content": "[PAD]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 26, "content": "[MASK]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 27, "content": "[SEP]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true }, { "id": 28, "content": "[CLS]", "single_word": false, "lstrip": false, "rstrip": false, "normalized": false, "special": true } ], "normalizer": { "type": "Sequence", "normalizers": [] }, "pre_tokenizer": { "type": "WhitespaceSplit" }, "post_processor": null, "decoder": { "type": "WordPiece", "prefix": "##", "cleanup": true }, "model": { "type": "WordLevel", "vocab": { "": 0, "": 1, "": 2, "": 3, "": 4, "": 5, "IDX_0": 6, "IDX_1": 7, "IDX_2": 8, "IDX_3": 9, "IDX_4": 10, "IDX_5": 11, "IDX_6": 12, "IDX_7": 13, "IDX_8": 14, "IDX_9": 15, "ATOM_C": 16, "ATOM_N": 17, "ATOM_O": 18, "ATOM_F": 19, "BOND_SINGLE": 20, "BOND_DOUBLE": 21, "BOND_TRIPLE": 22, "BOND_AROMATIC": 23 }, "unk_token": "[UNK]" } }