soldni commited on
Commit
86d32e8
1 Parent(s): b57ba18
Files changed (1) hide show
  1. tokenizer.json +8 -0
tokenizer.json CHANGED
@@ -206,6 +206,14 @@
206
  "pre_tokenizer": {
207
  "type": "Sequence",
208
  "pretokenizers": [
 
 
 
 
 
 
 
 
209
  {
210
  "type": "Split",
211
  "pattern": {
 
206
  "pre_tokenizer": {
207
  "type": "Sequence",
208
  "pretokenizers": [
209
+ {
210
+ "type": "Split",
211
+ "pattern": {
212
+ "Regex": "(?=(\\d{3})+(?!\\d))"
213
+ },
214
+ "behavior": "Isolated",
215
+ "invert": false
216
+ },
217
  {
218
  "type": "Split",
219
  "pattern": {