sigdig
Browse files- tokenizer.json +8 -0
tokenizer.json
CHANGED
@@ -206,6 +206,14 @@
|
|
206 |
"pre_tokenizer": {
|
207 |
"type": "Sequence",
|
208 |
"pretokenizers": [
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
209 |
{
|
210 |
"type": "Split",
|
211 |
"pattern": {
|
|
|
206 |
"pre_tokenizer": {
|
207 |
"type": "Sequence",
|
208 |
"pretokenizers": [
|
209 |
+
{
|
210 |
+
"type": "Split",
|
211 |
+
"pattern": {
|
212 |
+
"Regex": "(?=(\\d{3})+(?!\\d))"
|
213 |
+
},
|
214 |
+
"behavior": "Isolated",
|
215 |
+
"invert": false
|
216 |
+
},
|
217 |
{
|
218 |
"type": "Split",
|
219 |
"pattern": {
|