juanquivilla
commited on
Best model tokenizer with eval_loss: 0.9291 (Trained with Unsloth)
Browse files- tokenizer.json +0 -0
- tokenizer_config.json +1 -5
- vocab.json +0 -0
tokenizer.json
ADDED
The diff for this file is too large to render.
See raw diff
|
|
tokenizer_config.json
CHANGED
@@ -1,5 +1,4 @@
|
|
1 |
{
|
2 |
-
"add_bos_token": false,
|
3 |
"add_prefix_space": false,
|
4 |
"added_tokens_decoder": {
|
5 |
"5809": {
|
@@ -783,13 +782,10 @@
|
|
783 |
"chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}",
|
784 |
"clean_up_tokenization_spaces": false,
|
785 |
"eos_token": "<|im_end|>",
|
786 |
-
"errors": "replace",
|
787 |
"extra_special_tokens": {},
|
788 |
-
"from_slow": true,
|
789 |
-
"legacy": false,
|
790 |
"model_max_length": 16384,
|
791 |
"pad_token": "<|dummy_87|>",
|
792 |
-
"padding_side": "
|
793 |
"tokenizer_class": "GPT2Tokenizer",
|
794 |
"unk_token": "�"
|
795 |
}
|
|
|
1 |
{
|
|
|
2 |
"add_prefix_space": false,
|
3 |
"added_tokens_decoder": {
|
4 |
"5809": {
|
|
|
782 |
"chat_template": "{% for message in messages %}{% if (message['role'] == 'system') %}{{'<|im_start|>system<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'user') %}{{'<|im_start|>user<|im_sep|>' + message['content'] + '<|im_end|>'}}{% elif (message['role'] == 'assistant') %}{{'<|im_start|>assistant<|im_sep|>' + message['content'] + '<|im_end|>'}}{% endif %}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant<|im_sep|>' }}{% endif %}",
|
783 |
"clean_up_tokenization_spaces": false,
|
784 |
"eos_token": "<|im_end|>",
|
|
|
785 |
"extra_special_tokens": {},
|
|
|
|
|
786 |
"model_max_length": 16384,
|
787 |
"pad_token": "<|dummy_87|>",
|
788 |
+
"padding_side": "right",
|
789 |
"tokenizer_class": "GPT2Tokenizer",
|
790 |
"unk_token": "�"
|
791 |
}
|
vocab.json
CHANGED
The diff for this file is too large to render.
See raw diff
|
|