FuturisticVibes commited on
Commit
762494c
·
verified ·
1 Parent(s): a31b93d

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,72 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: apache-2.0
3
+ base_model: mistral-community/Mixtral-8x22B-v0.1
4
+ tags:
5
+ - generated_from_trainer
6
+ - axolotl
7
+ model-index:
8
+ - name: out
9
+ results: []
10
+ datasets:
11
+ - cognitivecomputations/Dolphin-2.9.2
12
+ - cognitivecomputations/SystemChat-2.0
13
+ - teknium/OpenHermes-2.5
14
+ - m-a-p/CodeFeedback-Filtered-Instruction
15
+ - cognitivecomputations/dolphin-coder
16
+ - cognitivecomputations/samantha-data
17
+ - HuggingFaceH4/ultrachat_200k
18
+ - microsoft/orca-math-word-problems-200k
19
+ - abacusai/SystemChat-1.1
20
+ - Locutusque/function-calling-chatml
21
+ - internlm/Agent-FLAN
22
+ language:
23
+ - en
24
+ ---
25
+
26
+ # Dolphin 2.9.2 Mixtral 8x22b 🐬
27
+
28
+ Curated and trained by Eric Hartford, Lucas Atkins, and Fernando Fernandes, and Cognitive Computations
29
+
30
+ [![Discord](https://img.shields.io/discord/1156064224225808488?logo=Discord&logoColor=%23ffffff&label=Discord&link=https%3A%2F%2Fdiscord.gg%2FtCMkMDDHwm)](https://discord.gg/cognitivecomputations)
31
+ Discord: https://discord.gg/cognitivecomputations
32
+
33
+ <img src="https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/ldkN1J0WIDQwU4vutGYiD.png" width="600" />
34
+
35
+ New in 2.9.2 is SystemChat 2.0 - a dataset designed to teach Dolphin to obey the system prompt, even over a long conversation.
36
+
37
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/z1u6U91tL-H__7JCDbWys.png)
38
+
39
+ My appreciation for the sponsors of Dolphin 2.9.2:
40
+ - [Crusoe Cloud](https://crusoe.ai/) - provided excellent on-demand 8xH100 node
41
+ - [OnDemand](https://on-demand.io/) - provided inference sponsorship, enabling creation of SystemChat
42
+
43
+ This model is based on Dolphin-2.9-Mixtral-8x22b, and is Apache-2.0 licensed.
44
+
45
+ The base model has 64k context, and fine-tuning was with 16k sequence length.
46
+
47
+ It took 1 week on 8xH100 provided by Crusoe Cloud
48
+
49
+ This model was trained FFT on 50% parameters (targeted with [Laser Scanner](https://github.com/cognitivecomputations/laserRMT/blob/main/laser_scanner.py) by Fernando Fernandes, David Golchinfar, Lucas Atkins, and Eric Hartford), using ChatML prompt template format.
50
+
51
+ example:
52
+
53
+ ```
54
+ <|im_start|>system
55
+ You are Dolphin, a helpful AI assistant.<|im_end|>
56
+ <|im_start|>user
57
+ {prompt}<|im_end|>
58
+ <|im_start|>assistant
59
+
60
+ ```
61
+
62
+ Dolphin-2.9 has a variety of instruction, conversational, and coding skills. It also has initial agentic abilities and supports function calling.
63
+
64
+ Dolphin is uncensored. I have filtered the dataset to remove alignment and bias. This makes the model more compliant. You are advised to implement your own alignment layer before exposing the model as a service. It will be highly compliant with any requests, even unethical ones. Please read my blog post about uncensored models. https://erichartford.com/uncensored-models You are responsible for any content you create using this model. Enjoy responsibly.
65
+
66
+ Dolphin is licensed Apache 2.0. I grant permission for any use, including commercial, that falls within accordance with Apache-2.0 license. Dolphin was trained on data generated from GPT4, among other models.
67
+
68
+ ## Evals
69
+
70
+ ![image/png](https://cdn-uploads.huggingface.co/production/uploads/63111b2d88942700629f5771/SDWV3SvJ8xR1gjl1z0LyO.png)
71
+
72
+ ## Training
added_tokens.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "<|im_end|>": 32000,
3
+ "<|im_start|>": 32001
4
+ }
config.json ADDED
@@ -0,0 +1,42 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "mistralai/Mixtral-8x22B-v0.1",
3
+ "architectures": [
4
+ "MixtralForCausalLM"
5
+ ],
6
+ "attention_dropout": 0.0,
7
+ "bos_token_id": 1,
8
+ "eos_token_id": 32000,
9
+ "hidden_act": "silu",
10
+ "hidden_size": 6144,
11
+ "initializer_range": 0.02,
12
+ "intermediate_size": 16384,
13
+ "max_position_embeddings": 65536,
14
+ "model_type": "mixtral",
15
+ "num_attention_heads": 48,
16
+ "num_experts_per_tok": 2,
17
+ "num_hidden_layers": 56,
18
+ "num_key_value_heads": 8,
19
+ "num_local_experts": 8,
20
+ "output_router_logits": false,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_theta": 1000000,
23
+ "router_aux_loss_coef": 0.001,
24
+ "router_jitter_noise": 0.0,
25
+ "sliding_window": null,
26
+ "tie_word_embeddings": false,
27
+ "torch_dtype": "bfloat16",
28
+ "transformers_version": "4.40.2",
29
+ "use_cache": false,
30
+ "vocab_size": 32002,
31
+ "quantization_config": {
32
+ "quant_method": "exl2",
33
+ "version": "0.1.5",
34
+ "bits": 6.0,
35
+ "head_bits": 8,
36
+ "calibration": {
37
+ "rows": 100,
38
+ "length": 2048,
39
+ "dataset": "(default)"
40
+ }
41
+ }
42
+ }
generation_config.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "bos_token_id": 1,
4
+ "do_sample": true,
5
+ "eos_token_id": 2,
6
+ "transformers_version": "4.40.2"
7
+ }
latest ADDED
@@ -0,0 +1 @@
 
 
1
+ global_step1442
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
output-00001-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b550b55841d02fa97f11f642f33caf86ef3b1299d05f454e7df00fbbc280baf4
3
+ size 8551463272
output-00002-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:66958e27e9aee095ec88beb135d4af6c634ddc41a7797c4d55056b56e570c5a2
3
+ size 8577695840
output-00003-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9d25926d5a73bf6d5bf15ecb613cf172490b86b52f5365e056885c24d21c9a7f
3
+ size 8522366520
output-00004-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:80da2a7e2b2942bf79d2a25a9ab23a15681d7913c8225c92abb87b93285ce0d5
3
+ size 8556838216
output-00005-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6956bf35098e1a5498180a4ea47fe7fe3427f645bc90b012092d5e1b2d423255
3
+ size 8526946352
output-00006-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd8c6f6d2fa9abb1164768ff0e610fb30ae0aefd7d2984ec1c6791509e6038a0
3
+ size 8566472968
output-00007-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a358fe3f45c806f4d0c6b7de3685fd040167949aa4e2200ac22f4ec9e2c57389
3
+ size 8521491120
output-00008-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5800c4d900827ce0132a10cc011b07862d2652452b61aceb363c16f9c16a2f5f
3
+ size 8573269928
output-00009-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:63e9ed4240a428a27cf0ef832a951924ac3a57ac26893e059639485f3ccada95
3
+ size 8521496112
output-00010-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ba3878dd25970ad8f2b0e4d01f8f0a9dfc14541360ba5b0015eac83dfa29a01d
3
+ size 8581784848
output-00011-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:27446a79aa4f21a8170815ee562cd6c7f5b60eff525cd13db11d9bee5fae0a30
3
+ size 8537766600
output-00012-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a99406f59b6eda267a9569bcfb6aeb7d0b5aadd1b576adfc87a4d5fe58b3812d
3
+ size 8571406040
output-00013-of-00013.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3f9bcdc9e7922ce93332d5937b975f82693de8888dc0edd87a60bb395d3fced
3
+ size 3125931080
special_tokens_map.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bos_token": {
3
+ "content": "<s>",
4
+ "lstrip": false,
5
+ "normalized": false,
6
+ "rstrip": false,
7
+ "single_word": false
8
+ },
9
+ "eos_token": {
10
+ "content": "<|im_end|>",
11
+ "lstrip": false,
12
+ "normalized": false,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "pad_token": "</s>",
17
+ "unk_token": {
18
+ "content": "<unk>",
19
+ "lstrip": false,
20
+ "normalized": false,
21
+ "rstrip": false,
22
+ "single_word": false
23
+ }
24
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dadfd56d766715c61d2ef780a525ab43b8e6da4de6865bda3d95fdef5e134055
3
+ size 493443
tokenizer_config.json ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": true,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": false,
9
+ "rstrip": false,
10
+ "single_word": false,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": false,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": false,
27
+ "special": true
28
+ },
29
+ "32000": {
30
+ "content": "<|im_end|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "32001": {
38
+ "content": "<|im_start|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": false
44
+ }
45
+ },
46
+ "additional_special_tokens": [],
47
+ "bos_token": "<s>",
48
+ "chat_template": "{% if not add_generation_prompt is defined %}{% set add_generation_prompt = false %}{% endif %}{% for message in messages %}{{'<|im_start|>' + message['role'] + '\n' + message['content'] + '<|im_end|>' + '\n'}}{% endfor %}{% if add_generation_prompt %}{{ '<|im_start|>assistant\n' }}{% endif %}",
49
+ "clean_up_tokenization_spaces": false,
50
+ "eos_token": "<|im_end|>",
51
+ "legacy": true,
52
+ "model_max_length": 1000000000000000019884624838656,
53
+ "pad_token": "</s>",
54
+ "sp_model_kwargs": {},
55
+ "spaces_between_special_tokens": false,
56
+ "tokenizer_class": "LlamaTokenizer",
57
+ "unk_token": "<unk>",
58
+ "use_default_system_prompt": false
59
+ }