diff --git "a/tokenizer_config.json" "b/tokenizer_config.json"
--- "a/tokenizer_config.json"
+++ "b/tokenizer_config.json"
@@ -19,32037 +19,25 @@
"special": true
},
"2": {
- "content": "<|im_end|>",
+ "content": "",
"lstrip": false,
"normalized": false,
"rstrip": false,
"single_word": false,
"special": true
- },
- "32000": {
- "content": "<|im_start|>",
- "lstrip": false,
- "normalized": false,
- "rstrip": false,
- "single_word": false,
- "special": false
}
},
"additional_special_tokens": [],
"bos_token": "",
"clean_up_tokenization_spaces": false,
- "eos_token": "<|im_end|>",
+ "eos_token": "",
"legacy": true,
- "model_max_length": 1000000000000000019884624838656,
- "pad_token": "<|im_end|>",
+ "model_max_length": 32768,
+ "pad_token": "",
+ "padding_side": "right",
"sp_model_kwargs": {},
"spaces_between_special_tokens": false,
"tokenizer_class": "LlamaTokenizer",
- "trust_remote_code": false,
"unk_token": "",
- "use_default_system_prompt": false,
- "use_fast": true,
- "vocab": {
- "\u0001": 29534,
- "\u0002": 30551,
- "\u0003": 30662,
- "\u0004": 30724,
- "\u0005": 30550,
- "\u0006": 30314,
- "\u0007": 30963,
- "\b": 31129,
- "\u000b": 30638,
- "\f": 29683,
- "\r": 28801,
- "\r\r": 21259,
- "\u000e": 30517,
- "\u000f": 30698,
- "\u0010": 30388,
- "\u0011": 30557,
- "\u0012": 30298,
- "\u0013": 30453,
- "\u0014": 30721,
- "\u0015": 30675,
- "\u0016": 30935,
- "\u0017": 30841,
- "\u0018": 30555,
- "\u0019": 30969,
- "\u001a": 30759,
- "\u001b": 30246,
- "\u001c": 31134,
- "\u001d": 31236,
- "\u001e": 31150,
- "\u001f": 31217,
- "!": 28808,
- "!!": 3946,
- "!!!": 13915,
- "!!!!": 19010,
- "!\"": 2781,
- "!\");": 22896,
- "!'": 8873,
- "!(": 8213,
- "!(\"": 19228,
- "!)": 14280,
- "!*": 24240,
- "!--": 8567,
- "!=": 14379,
- "![": 25531,
- "!\\": 8263,
- "!\\!\\": 18378,
- "!_": 19193,
- "!”": 14569,
- "\"": 28739,
- "\"\r": 11525,
- "\"\"": 2539,
- "\"\"\"": 18552,
- "\"%": 28413,
- "\")": 1243,
- "\")\r": 16949,
- "\"))": 4948,
- "\")));": 28290,
- "\"));": 5341,
- "\"),": 3548,
- "\").": 4145,
- "\"):": 21021,
- "\");": 1041,
- "\");\r": 6913,
- "\")]": 6564,
- "\"+": 13578,
- "\",": 548,
- "\",\r": 10939,
- "\",\"": 5988,
- "\".": 2586,
- "\"/>": 2720,
- "\":": 1264,
- "\":\"": 10549,
- "\";": 1599,
- "\";\r": 11880,
- "\">": 1355,
- "\">\r": 14980,
- "\">&": 25260,
- "\"><": 10123,
- "\">": 5828,
- "\">": 26109,
- "\"?": 27257,
- "\"\\": 16646,
- "\"]": 2242,
- "\"])": 18073,
- "\"],": 8883,
- "\"].": 15254,
- "\"];": 20652,
- "\"][\"": 24635,
- "\"`": 8312,
- "\"}": 17395,
- "\"},": 7706,
- "\"—": 17216,
- "#": 28771,
- "##": 1064,
- "###": 27332,
- "####": 2000,
- "########": 3590,
- "################": 6668,
- "#[": 8801,
- "#{": 16775,
- "$": 28776,
- "$$": 10811,
- "$(": 13299,
- "$(\"#": 26690,
- "$('#": 24546,
- "$)": 11996,
- "$),": 27517,
- "$).": 23073,
- "$,": 1777,
- "$-": 4616,
- "$.": 1715,
- "$:": 27561,
- "$;": 24912,
- "$\\": 4659,
- "$^{-": 23596,
- "$_": 11914,
- "$_{": 28037,
- "${": 3996,
- "$}": 15489,
- "$~": 26051,
- "%": 28823,
- "%%": 6069,
- "%%%%": 11032,
- "%%%%%%%%": 18849,
- "%)": 11526,
- "%,": 16036,
- "%.": 13210,
- "%;": 26403,
- "&": 28800,
- "": 27440,
- "&&": 6630,
- "&=": 18217,
- "&\\": 12331,
- "'": 28742,
- "'\r": 27732,
- "'\"": 21236,
- "'\",": 27354,
- "'$": 15023,
- "''": 5437,
- "'''": 25135,
- "'(": 22382,
- "')": 1481,
- "')\r": 22186,
- "'))": 8100,
- "')).": 27662,
- "'));": 12941,
- "'),": 4829,
- "')->": 18364,
- "').": 4532,
- "'):": 18591,
- "');": 2207,
- "');\r": 16943,
- "'*": 21244,
- "'+": 18660,
- "',": 647,
- "',\r": 10278,
- "','": 6078,
- "'.": 4135,
- "'.$": 19613,
- "':": 1869,
- "';": 1775,
- "';\r": 18759,
- "'=>": 14186,
- "'=>'": 24262,
- "'>": 13874,
- "'\\": 9620,
- "']": 1421,
- "'])": 8757,
- "']))": 16433,
- "']);": 13932,
- "'],": 5807,
- "'].": 14303,
- "'];": 9153,
- "'][": 24948,
- "']['": 8780,
- "'_": 16314,
- "'}": 14491,
- "'},": 16041,
- "(": 28732,
- "(\r": 17334,
- "(!": 6469,
- "(\"": 618,
- "(\"\");": 28361,
- "(\"#": 11082,
- "(\"%": 6996,
- "(\",": 23431,
- "(\"-": 24751,
- "(\".": 15560,
- "(\"/": 9933,
- "(\"<": 21784,
- "(\"[": 16848,
- "(\"\\": 10592,
- "($": 1501,
- "($_": 14076,
- "(%": 16256,
- "(&": 1735,
- "('": 857,
- "('#": 10457,
- "('.": 10182,
- "('./": 21093,
- "('/": 10815,
- "('<": 23880,
- "('\\": 21421,
- "((": 1880,
- "(((": 11133,
- "(()": 10289,
- "()": 470,
- "()\r": 9962,
- "())": 1904,
- "()))": 9859,
- "()));": 8797,
- "()),": 11168,
- "()).": 9079,
- "());": 1657,
- "());\r": 15056,
- "(),": 1648,
- "()->": 4330,
- "().": 1546,
- "():": 5888,
- "();": 692,
- "();\r": 4420,
- "()[": 19482,
- "()]": 21183,
- "(){": 8881,
- "()}": 25800,
- "(*": 4756,
- "(-": 6422,
- "(/": 16552,
- "(:": 7306,
- "(?:": 26733,
- "(@": 10680,
- "([": 5187,
- "(['": 18706,
- "([]": 15993,
- "(\\": 1365,
- "(_": 4831,
- "(__": 7697,
- "(`": 11045,
- "({": 2882,
- "({\\": 11592,
- "(|": 18214,
- ")": 28731,
- ")\r": 3174,
- ")\"": 12159,
- ")\");": 18490,
- ")\",": 10647,
- ")$": 2322,
- ")$,": 7927,
- ")$-": 25878,
- ")$.": 7239,
- ")&": 22743,
- ")'": 24996,
- ")',": 20735,
- ")(": 3847,
- ")((": 26053,
- "))": 743,
- "))\r": 14668,
- ")))": 5429,
- "))))": 22318,
- ")));": 7574,
- ")),": 7218,
- ")).": 8698,
- ")):": 24770,
- "));": 1090,
- "));\r": 10941,
- ")){": 20474,
- ")*": 4869,
- ")**": 10095,
- ")+": 10528,
- ")+\\": 25886,
- "),": 557,
- "),\r": 23486,
- "),\\": 19908,
- ")-": 9572,
- ")--": 20560,
- ")--(": 24135,
- ")->": 6952,
- ")-\\": 26338,
- ").": 609,
- ")/": 10210,
- "):": 1329,
- "):\r": 20358,
- ");": 344,
- ");\r": 1761,
- ")": 10900,
- ")=": 6706,
- ")=\\": 11148,
- ")>": 27364,
- ")?": 11840,
- ")?;": 24909,
- ")[": 10908,
- ")\\": 2681,
- ")\\,": 23115,
- ")\\\\": 23527,
- ")]": 4753,
- ")^": 7981,
- ")^{": 10825,
- ")^{-": 22816,
- ")^{\\": 23022,
- ")_": 14080,
- ")_{": 20431,
- "){": 3311,
- "){\r": 18055,
- "){\\": 11544,
- ")|": 12362,
- ")}": 3460,
- ")}$": 16016,
- ")}(": 20089,
- ")},": 14810,
- ")}\\": 11688,
- ")}_{": 26585,
- ")}{": 14442,
- ")}{\\": 23045,
- ")}}": 22505,
- "*": 28736,
- "*\r": 13667,
- "*\"": 27045,
- "*(": 13411,
- "*)": 2523,
- "*),": 9151,
- "*).": 12619,
- "**": 348,
- "**)": 12885,
- "***": 16623,
- "****": 565,
- "******": 9329,
- "********": 812,
- "************": 17328,
- "**************": 20798,
- "****************": 1393,
- "******/": 15212,
- "**,": 4789,
- "**.": 11740,
- "**/": 26221,
- "**:": 9189,
- "*,": 1561,
- "*-": 13725,
- "*.": 2414,
- "*/": 1075,
- "*/\r": 9823,
- "*:": 16550,
- "*;": 13732,
- "*>": 27924,
- "*>(": 19011,
- "*\\": 19008,
- "*{": 21599,
- "*}": 4237,
- "+": 28806,
- "+\"": 12762,
- "+'": 23922,
- "+(": 24993,
- "++": 1680,
- "++)": 3073,
- "++){": 23385,
- "++++": 17501,
- "++;": 5732,
- "++]": 21371,
- "+-": 22805,
- "+=": 21234,
- "+\\": 3767,
- "+\\_\\": 25471,
- ",": 28725,
- ",\r": 4604,
- ",\"": 862,
- ",$": 13948,
- ",&": 17610,
- ",'": 3029,
- ",(": 20786,
- ",*": 17205,
- ",,": 8787,
- ",,,,": 20863,
- ",-": 7667,
- ",.": 27608,
- ",\\": 2142,
- ",\\,": 21946,
- ",\\,\\": 28690,
- ",_": 5796,
- ",{\\": 26225,
- ",”": 3372,
- "-": 28733,
- "-$": 15191,
- "-%": 23962,
- "-(": 24894,
- "-)": 27346,
- "-,": 24244,
- "--": 376,
- "--)": 26107,
- "---": 7395,
- "----": 502,
- "-----": 13918,
- "------": 18697,
- "-------": 25702,
- "--------": 709,
- "---------": 25635,
- "----------": 16417,
- "-----------": 26684,
- "------------": 9762,
- "-------------": 22843,
- "--------------": 23633,
- "----------------": 1177,
- "--;": 16850,
- "-->": 26345,
- "-.": 15376,
- "->": 471,
- "->_": 9631,
- "-\\": 3137,
- ".": 28723,
- ".\r": 5488,
- ".\"": 611,
- ".\"\"\"": 16498,
- ".\")": 17169,
- ".\");": 10927,
- ".\",": 9191,
- ".\";": 22319,
- ".\"]": 28659,
- ".\"_": 26408,
- ".$": 5910,
- ".'": 1815,
- ".'\"": 26773,
- ".');": 27148,
- ".',": 13007,
- ".(": 21057,
- ".)": 2974,
- ".),": 14439,
- ".).": 15745,
- ".*": 3752,
- ".**": 9080,
- ".*;": 17362,
- ".,": 2063,
- ".-": 14902,
- "..": 568,
- "...": 1101,
- "...\"": 7508,
- "...'": 22984,
- "...)": 16381,
- "...,": 25272,
- "....": 3406,
- ".....": 24694,
- "........": 10522,
- "................": 22207,
- "...": 23032,
- "../": 1979,
- "../../": 4290,
- "./": 3849,
- ".:": 12813,
- ".;": 19362,
- ".<": 26364,
- ".": 4698,
- ".[": 20011,
- ".\\": 5923,
- ".]": 20148,
- ".^{[": 22803,
- "._": 1573,
- ".__": 9320,
- ".}": 8089,
- ".~": 10749,
- ".~(\\": 21622,
- ".~\\": 8080,
- ".’": 18526,
- ".“": 24033,
- ".”": 2435,
- "/": 28748,
- "/\"": 17713,
- "/#": 27581,
- "/$": 23530,
- "/${": 23820,
- "/%": 22237,
- "/'": 25415,
- "/(": 20974,
- "/)": 19840,
- "/*": 1477,
- "/*!": 19994,
- "/**": 1922,
- "/**\r": 14756,
- "/,": 25574,
- "/.": 15704,
- "//": 421,
- "///": 3312,
- "////": 1603,
- "////////": 2810,
- "////////////////": 5172,
- "/:": 26387,
- "/>": 2370,
- "/\\": 8673,
- "/_": 23762,
- "/{": 11912,
- "0": 28734,
- "1": 28740,
- "2": 28750,
- "3": 28770,
- "4": 28781,
- "5": 28782,
- "6": 28784,
- "7": 28787,
- "8": 28783,
- "9": 28774,
- ":": 28747,
- ":\r": 8230,
- ":\"": 4825,
- ":\",": 26906,
- ":%": 8210,
- ":%.*": 16772,
- ":%.*]]": 27456,
- ":'": 14243,
- ":*": 15563,
- ":**": 4049,
- ":-": 18684,
- "://": 1508,
- "::": 564,
- "::$": 24498,
- "::<": 23015,
- "::_": 21312,
- "::{": 21353,
- "::~": 27893,
- ":": 11423,
- ":\\": 9941,
- ":_": 19746,
- ":`": 21502,
- ":{": 26856,
- ":}": 26657,
- ";": 28745,
- ";\r": 1271,
- ";\"": 24626,
- ";&": 11043,
- ";/": 16663,
- ";;": 19406,
- ";": 16211,
- ";\\": 8511,
- ";}": 26070,
- "<": 28789,
- "": 6998,
- "▁-=": 10546,
- "▁->": 3193,
- "▁-\\": 12300,
- "▁.": 842,
- "▁.\"": 15192,
- "▁..": 8072,
- "▁...": 3850,
- "▁./": 17449,
- "▁.=": 11930,
- "▁/": 732,
- "▁/*": 1385,
- "▁/*!": 17847,
- "▁/**": 2067,
- "▁/**\r": 20418,
- "▁/******/": 19250,
- "▁/***/": 15320,
- "▁//": 589,
- "▁//\r": 26083,
- "▁//!": 19497,
- "▁///": 1954,
- "▁/>": 4884,
- "▁:": 714,
- "▁:)": 15108,
- "▁::": 6210,
- "▁:=": 2137,
- "▁;": 2753,
- "▁<": 523,
- "▁