bibidentuhanoi commited on
Commit
3123779
·
verified ·
1 Parent(s): 559e353

Upload tokenizer

Browse files
added_tokens.json CHANGED
@@ -1,3 +1,4 @@
1
  {
2
- "<|im_start|>": 32000
 
3
  }
 
1
  {
2
+ "<|im_end|>": 32000,
3
+ "<|im_start|>": 32001
4
  }
special_tokens_map.json CHANGED
@@ -14,7 +14,7 @@
14
  "single_word": false
15
  },
16
  "pad_token": {
17
- "content": "<|im_end|>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
 
14
  "single_word": false
15
  },
16
  "pad_token": {
17
+ "content": "<unk>",
18
  "lstrip": false,
19
  "normalized": false,
20
  "rstrip": false,
tokenizer.json CHANGED
@@ -23,7 +23,7 @@
23
  },
24
  {
25
  "id": 2,
26
- "content": "<|im_end|>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
@@ -32,12 +32,21 @@
32
  },
33
  {
34
  "id": 32000,
 
 
 
 
 
 
 
 
 
35
  "content": "<|im_start|>",
36
  "single_word": false,
37
  "lstrip": false,
38
  "rstrip": false,
39
  "normalized": false,
40
- "special": false
41
  }
42
  ],
43
  "normalizer": {
@@ -147,7 +156,7 @@
147
  "vocab": {
148
  "<unk>": 0,
149
  "<s>": 1,
150
- "<|im_end|>": 2,
151
  "<0x00>": 3,
152
  "<0x01>": 4,
153
  "<0x02>": 5,
 
23
  },
24
  {
25
  "id": 2,
26
+ "content": "</s>",
27
  "single_word": false,
28
  "lstrip": false,
29
  "rstrip": false,
 
32
  },
33
  {
34
  "id": 32000,
35
+ "content": "<|im_end|>",
36
+ "single_word": false,
37
+ "lstrip": false,
38
+ "rstrip": false,
39
+ "normalized": false,
40
+ "special": true
41
+ },
42
+ {
43
+ "id": 32001,
44
  "content": "<|im_start|>",
45
  "single_word": false,
46
  "lstrip": false,
47
  "rstrip": false,
48
  "normalized": false,
49
+ "special": true
50
  }
51
  ],
52
  "normalizer": {
 
156
  "vocab": {
157
  "<unk>": 0,
158
  "<s>": 1,
159
+ "</s>": 2,
160
  "<0x00>": 3,
161
  "<0x01>": 4,
162
  "<0x02>": 5,
tokenizer_config.json CHANGED
The diff for this file is too large to render. See raw diff