DewEfresh commited on
Commit
db2bead
·
verified ·
1 Parent(s): 79b67d7

Upload folder using huggingface_hub

Browse files
README.md ADDED
@@ -0,0 +1,177 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ base_model:
3
+ - m-a-p/neo_7b
4
+ - m-a-p/neo_7b
5
+ tags:
6
+ - merge
7
+ - mergekit
8
+ - lazymergekit
9
+ - m-a-p/neo_7b
10
+ ---
11
+
12
+ # Neo_7b-merge4
13
+
14
+ Neo_7b-merge4 is a merge of the following models using [LazyMergekit](https://colab.research.google.com/drive/1obulZ1ROXHjYLn6PPZJwRR6GzgQogxxb?usp=sharing):
15
+ * [m-a-p/neo_7b](https://huggingface.co/m-a-p/neo_7b)
16
+ * [m-a-p/neo_7b](https://huggingface.co/m-a-p/neo_7b)
17
+
18
+ ## 🧩 Configuration
19
+
20
+ ```yaml
21
+ slices:
22
+ # Group 1
23
+ - sources:
24
+ - model: m-a-p/neo_7b
25
+ layer_range: [0, 0]
26
+ - model: m-a-p/neo_7b
27
+ layer_range: [3, 3]
28
+ - sources:
29
+ - model: m-a-p/neo_7b
30
+ layer_range: [1, 1]
31
+ - model: m-a-p/neo_7b
32
+ layer_range: [3, 3]
33
+ - sources:
34
+ - model: m-a-p/neo_7b
35
+ layer_range: [2, 2]
36
+ - model: m-a-p/neo_7b
37
+ layer_range: [3, 3]
38
+ # Group 2
39
+ - sources:
40
+ - model: m-a-p/neo_7b
41
+ layer_range: [4, 4]
42
+ - model: m-a-p/neo_7b
43
+ layer_range: [7, 7]
44
+ - sources:
45
+ - model: m-a-p/neo_7b
46
+ layer_range: [5, 5]
47
+ - model: m-a-p/neo_7b
48
+ layer_range: [7, 7]
49
+ - sources:
50
+ - model: m-a-p/neo_7b
51
+ layer_range: [6, 6]
52
+ - model: m-a-p/neo_7b
53
+ layer_range: [7, 7]
54
+ # Group 3
55
+ - sources:
56
+ - model: m-a-p/neo_7b
57
+ layer_range: [8, 8]
58
+ - model: m-a-p/neo_7b
59
+ layer_range: [11, 11]
60
+ - sources:
61
+ - model: m-a-p/neo_7b
62
+ layer_range: [9, 9]
63
+ - model: m-a-p/neo_7b
64
+ layer_range: [11, 11]
65
+ - sources:
66
+ - model: m-a-p/neo_7b
67
+ layer_range: [10, 10]
68
+ - model: m-a-p/neo_7b
69
+ layer_range: [11, 11]
70
+ # Group 4
71
+ - sources:
72
+ - model: m-a-p/neo_7b
73
+ layer_range: [12, 12]
74
+ - model: m-a-p/neo_7b
75
+ layer_range: [15, 15]
76
+ - sources:
77
+ - model: m-a-p/neo_7b
78
+ layer_range: [13, 13]
79
+ - model: m-a-p/neo_7b
80
+ layer_range: [15, 15]
81
+ - sources:
82
+ - model: m-a-p/neo_7b
83
+ layer_range: [14, 14]
84
+ - model: m-a-p/neo_7b
85
+ layer_range: [15, 15]
86
+ # Group 5
87
+ - sources:
88
+ - model: m-a-p/neo_7b
89
+ layer_range: [16, 16]
90
+ - model: m-a-p/neo_7b
91
+ layer_range: [19, 19]
92
+ - sources:
93
+ - model: m-a-p/neo_7b
94
+ layer_range: [17, 17]
95
+ - model: m-a-p/neo_7b
96
+ layer_range: [19, 19]
97
+ - sources:
98
+ - model: m-a-p/neo_7b
99
+ layer_range: [18, 18]
100
+ - model: m-a-p/neo_7b
101
+ layer_range: [19, 19]
102
+ # Group 6
103
+ - sources:
104
+ - model: m-a-p/neo_7b
105
+ layer_range: [20, 20]
106
+ - model: m-a-p/neo_7b
107
+ layer_range: [23, 23]
108
+ - sources:
109
+ - model: m-a-p/neo_7b
110
+ layer_range: [21, 21]
111
+ - model: m-a-p/neo_7b
112
+ layer_range: [23, 23]
113
+ - sources:
114
+ - model: m-a-p/neo_7b
115
+ layer_range: [22, 22]
116
+ - model: m-a-p/neo_7b
117
+ layer_range: [23, 23]
118
+ # Group 7 (last group)
119
+ - sources:
120
+ - model: m-a-p/neo_7b
121
+ layer_range: [24, 24]
122
+ - model: m-a-p/neo_7b
123
+ layer_range: [27, 27]
124
+ - sources:
125
+ - model: m-a-p/neo_7b
126
+ layer_range: [25, 25]
127
+ - model: m-a-p/neo_7b
128
+ layer_range: [27, 27]
129
+ - sources:
130
+ - model: m-a-p/neo_7b
131
+ layer_range: [26, 26]
132
+ - model: m-a-p/neo_7b
133
+ layer_range: [27, 27]
134
+ merge_method: slerp
135
+ base_model: m-a-p/neo_7b
136
+ parameters:
137
+ t: 0.3333 # Apply 1/3 of the 4th layer to each of the previous 3 layers
138
+ dtype: bfloat16
139
+ output_path: ./merged_redistributed_neo_7b
140
+ model_config:
141
+ num_hidden_layers: 21
142
+ attention_bias: false
143
+ attention_dropout: 0.0
144
+ hidden_act: "silu"
145
+ hidden_size: 3072
146
+ intermediate_size: 24576
147
+ num_attention_heads: 16
148
+ num_key_value_heads: 16
149
+ rms_norm_eps: 1e-05
150
+ rope_theta: 10000.0
151
+ use_cache: true
152
+ ```
153
+
154
+ ## 💻 Usage
155
+
156
+ ```python
157
+ !pip install -qU transformers accelerate
158
+
159
+ from transformers import AutoTokenizer
160
+ import transformers
161
+ import torch
162
+
163
+ model = "DewEfresh/Neo_7b-merge4"
164
+ messages = [{"role": "user", "content": "What is a large language model?"}]
165
+
166
+ tokenizer = AutoTokenizer.from_pretrained(model)
167
+ prompt = tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=True)
168
+ pipeline = transformers.pipeline(
169
+ "text-generation",
170
+ model=model,
171
+ torch_dtype=torch.float16,
172
+ device_map="auto",
173
+ )
174
+
175
+ outputs = pipeline(prompt, max_new_tokens=256, do_sample=True, temperature=0.7, top_k=50, top_p=0.95)
176
+ print(outputs[0]["generated_text"])
177
+ ```
added_tokens.json ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ {
2
+ "<|CLS|>": 64000,
3
+ "<|EOD|>": 64002,
4
+ "<|MASK|>": 64003,
5
+ "<|PAD|>": 64004,
6
+ "<|SEP|>": 64001
7
+ }
config.json ADDED
@@ -0,0 +1,29 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "m-a-p/neo_7b",
3
+ "architectures": [
4
+ "LlamaForCausalLM"
5
+ ],
6
+ "attention_bias": false,
7
+ "attention_dropout": 0.0,
8
+ "bos_token_id": 1,
9
+ "eos_token_id": 2,
10
+ "hidden_act": "silu",
11
+ "hidden_size": 3072,
12
+ "initializer_range": 0.02,
13
+ "intermediate_size": 24576,
14
+ "max_position_embeddings": 8192,
15
+ "mlp_bias": false,
16
+ "model_type": "llama",
17
+ "num_attention_heads": 16,
18
+ "num_hidden_layers": 0,
19
+ "num_key_value_heads": 16,
20
+ "pretraining_tp": 1,
21
+ "rms_norm_eps": 1e-05,
22
+ "rope_scaling": null,
23
+ "rope_theta": 10000.0,
24
+ "tie_word_embeddings": false,
25
+ "torch_dtype": "bfloat16",
26
+ "transformers_version": "4.42.3",
27
+ "use_cache": true,
28
+ "vocab_size": 64256
29
+ }
mergekit_config.yml ADDED
@@ -0,0 +1,132 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ slices:
3
+ # Group 1
4
+ - sources:
5
+ - model: m-a-p/neo_7b
6
+ layer_range: [0, 0]
7
+ - model: m-a-p/neo_7b
8
+ layer_range: [3, 3]
9
+ - sources:
10
+ - model: m-a-p/neo_7b
11
+ layer_range: [1, 1]
12
+ - model: m-a-p/neo_7b
13
+ layer_range: [3, 3]
14
+ - sources:
15
+ - model: m-a-p/neo_7b
16
+ layer_range: [2, 2]
17
+ - model: m-a-p/neo_7b
18
+ layer_range: [3, 3]
19
+ # Group 2
20
+ - sources:
21
+ - model: m-a-p/neo_7b
22
+ layer_range: [4, 4]
23
+ - model: m-a-p/neo_7b
24
+ layer_range: [7, 7]
25
+ - sources:
26
+ - model: m-a-p/neo_7b
27
+ layer_range: [5, 5]
28
+ - model: m-a-p/neo_7b
29
+ layer_range: [7, 7]
30
+ - sources:
31
+ - model: m-a-p/neo_7b
32
+ layer_range: [6, 6]
33
+ - model: m-a-p/neo_7b
34
+ layer_range: [7, 7]
35
+ # Group 3
36
+ - sources:
37
+ - model: m-a-p/neo_7b
38
+ layer_range: [8, 8]
39
+ - model: m-a-p/neo_7b
40
+ layer_range: [11, 11]
41
+ - sources:
42
+ - model: m-a-p/neo_7b
43
+ layer_range: [9, 9]
44
+ - model: m-a-p/neo_7b
45
+ layer_range: [11, 11]
46
+ - sources:
47
+ - model: m-a-p/neo_7b
48
+ layer_range: [10, 10]
49
+ - model: m-a-p/neo_7b
50
+ layer_range: [11, 11]
51
+ # Group 4
52
+ - sources:
53
+ - model: m-a-p/neo_7b
54
+ layer_range: [12, 12]
55
+ - model: m-a-p/neo_7b
56
+ layer_range: [15, 15]
57
+ - sources:
58
+ - model: m-a-p/neo_7b
59
+ layer_range: [13, 13]
60
+ - model: m-a-p/neo_7b
61
+ layer_range: [15, 15]
62
+ - sources:
63
+ - model: m-a-p/neo_7b
64
+ layer_range: [14, 14]
65
+ - model: m-a-p/neo_7b
66
+ layer_range: [15, 15]
67
+ # Group 5
68
+ - sources:
69
+ - model: m-a-p/neo_7b
70
+ layer_range: [16, 16]
71
+ - model: m-a-p/neo_7b
72
+ layer_range: [19, 19]
73
+ - sources:
74
+ - model: m-a-p/neo_7b
75
+ layer_range: [17, 17]
76
+ - model: m-a-p/neo_7b
77
+ layer_range: [19, 19]
78
+ - sources:
79
+ - model: m-a-p/neo_7b
80
+ layer_range: [18, 18]
81
+ - model: m-a-p/neo_7b
82
+ layer_range: [19, 19]
83
+ # Group 6
84
+ - sources:
85
+ - model: m-a-p/neo_7b
86
+ layer_range: [20, 20]
87
+ - model: m-a-p/neo_7b
88
+ layer_range: [23, 23]
89
+ - sources:
90
+ - model: m-a-p/neo_7b
91
+ layer_range: [21, 21]
92
+ - model: m-a-p/neo_7b
93
+ layer_range: [23, 23]
94
+ - sources:
95
+ - model: m-a-p/neo_7b
96
+ layer_range: [22, 22]
97
+ - model: m-a-p/neo_7b
98
+ layer_range: [23, 23]
99
+ # Group 7 (last group)
100
+ - sources:
101
+ - model: m-a-p/neo_7b
102
+ layer_range: [24, 24]
103
+ - model: m-a-p/neo_7b
104
+ layer_range: [27, 27]
105
+ - sources:
106
+ - model: m-a-p/neo_7b
107
+ layer_range: [25, 25]
108
+ - model: m-a-p/neo_7b
109
+ layer_range: [27, 27]
110
+ - sources:
111
+ - model: m-a-p/neo_7b
112
+ layer_range: [26, 26]
113
+ - model: m-a-p/neo_7b
114
+ layer_range: [27, 27]
115
+ merge_method: slerp
116
+ base_model: m-a-p/neo_7b
117
+ parameters:
118
+ t: 0.3333 # Apply 1/3 of the 4th layer to each of the previous 3 layers
119
+ dtype: bfloat16
120
+ output_path: ./merged_redistributed_neo_7b
121
+ model_config:
122
+ num_hidden_layers: 21
123
+ attention_bias: false
124
+ attention_dropout: 0.0
125
+ hidden_act: "silu"
126
+ hidden_size: 3072
127
+ intermediate_size: 24576
128
+ num_attention_heads: 16
129
+ num_key_value_heads: 16
130
+ rms_norm_eps: 1e-05
131
+ rope_theta: 10000.0
132
+ use_cache: true
model-00001-of-00001.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:41209159383a6438fa1e73146df470aae582c40bf510ec2495a3f8780477ce87
3
+ size 789584192
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:93f2a7f3db85521671732789bb8fd1bb3ae3a7e0d33170ea530f53a25346fcdd
3
+ size 4998668592
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1227a57ac0fc74f5a2c223e5e41dd53ba09f50acbf6d63e5f166a8e1fc740c5e
3
+ size 4926336584
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5abc28b3d1b89d4077e831f565ea2b0794a20ade31c750e450a00c7ebc9327ea
3
+ size 4907455792
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:028abab236ed774f37153fc8d7c0dc2f87b34b3dc964d20ffab314a43f3d6706
3
+ size 226505520
model.safetensors.index.json ADDED
@@ -0,0 +1 @@
 
 
1
+ {"metadata": {"mergekit_version": "0.0.4.4", "total_size": 789583872}, "weight_map": {"lm_head.weight": "model-00001-of-00001.safetensors", "model.embed_tokens.weight": "model-00001-of-00001.safetensors", "model.norm.weight": "model-00001-of-00001.safetensors"}}
special_tokens_map.json ADDED
@@ -0,0 +1,37 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ "<|CLS|>",
4
+ "<|SEP|>",
5
+ "<|EOD|>",
6
+ "<|MASK|>",
7
+ "<|PAD|>"
8
+ ],
9
+ "bos_token": {
10
+ "content": "<s>",
11
+ "lstrip": false,
12
+ "normalized": true,
13
+ "rstrip": false,
14
+ "single_word": false
15
+ },
16
+ "eos_token": {
17
+ "content": "</s>",
18
+ "lstrip": false,
19
+ "normalized": true,
20
+ "rstrip": false,
21
+ "single_word": true
22
+ },
23
+ "pad_token": {
24
+ "content": "<unk>",
25
+ "lstrip": false,
26
+ "normalized": true,
27
+ "rstrip": false,
28
+ "single_word": true
29
+ },
30
+ "unk_token": {
31
+ "content": "<unk>",
32
+ "lstrip": false,
33
+ "normalized": true,
34
+ "rstrip": false,
35
+ "single_word": true
36
+ }
37
+ }
tokenizer.model ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f6a2447b0e5664cabb2481587597102d82f42f0ccb7ef22e1c2d95494a8b03c5
3
+ size 1002561
tokenizer_config.json ADDED
@@ -0,0 +1,95 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_bos_token": false,
3
+ "add_eos_token": false,
4
+ "added_tokens_decoder": {
5
+ "0": {
6
+ "content": "<unk>",
7
+ "lstrip": false,
8
+ "normalized": true,
9
+ "rstrip": false,
10
+ "single_word": true,
11
+ "special": true
12
+ },
13
+ "1": {
14
+ "content": "<s>",
15
+ "lstrip": false,
16
+ "normalized": true,
17
+ "rstrip": false,
18
+ "single_word": false,
19
+ "special": true
20
+ },
21
+ "2": {
22
+ "content": "</s>",
23
+ "lstrip": false,
24
+ "normalized": true,
25
+ "rstrip": false,
26
+ "single_word": true,
27
+ "special": true
28
+ },
29
+ "64000": {
30
+ "content": "<|CLS|>",
31
+ "lstrip": false,
32
+ "normalized": false,
33
+ "rstrip": false,
34
+ "single_word": false,
35
+ "special": true
36
+ },
37
+ "64001": {
38
+ "content": "<|SEP|>",
39
+ "lstrip": false,
40
+ "normalized": false,
41
+ "rstrip": false,
42
+ "single_word": false,
43
+ "special": true
44
+ },
45
+ "64002": {
46
+ "content": "<|EOD|>",
47
+ "lstrip": false,
48
+ "normalized": false,
49
+ "rstrip": false,
50
+ "single_word": false,
51
+ "special": true
52
+ },
53
+ "64003": {
54
+ "content": "<|MASK|>",
55
+ "lstrip": false,
56
+ "normalized": false,
57
+ "rstrip": false,
58
+ "single_word": false,
59
+ "special": true
60
+ },
61
+ "64004": {
62
+ "content": "<|PAD|>",
63
+ "lstrip": false,
64
+ "normalized": false,
65
+ "rstrip": false,
66
+ "single_word": false,
67
+ "special": true
68
+ }
69
+ },
70
+ "additional_special_tokens": [
71
+ "<|CLS|>",
72
+ "<|SEP|>",
73
+ "<|EOD|>",
74
+ "<|MASK|>",
75
+ "<|PAD|>"
76
+ ],
77
+ "auto_map": {
78
+ "AutoTokenizer": [
79
+ "m-a-p/neo_7b--tokenization_neo.NEOTokenizer",
80
+ null
81
+ ]
82
+ },
83
+ "bos_token": "<s>",
84
+ "chat_template": "{% set system_message = 'You are a helpful, respectful and honest assistant. Always answer as helpfully as possible, while being safe. Your answers should not include any harmful, unethical, racist, sexist, toxic, dangerous, or illegal content. Please ensure that your responses are socially unbiased and positive in nature.\\n\\nIf a question does not make any sense, or is not factually coherent, explain why instead of answering something not correct. If you don\\'t know the answer to a question, please don\\'t share false information.' %}{% if messages[0]['role'] == 'system' %}{% set system_message = messages[0]['content'] %}{% endif %}{% for message in messages %}{% set content = message['content'] %}{% if loop.index0 == 0 and system_message is defined %}{% set content = '<<SYS>>\\n' + system_message + '\\n<</SYS>>\\n\\n' + message['content'] %}{% endif %}{% if message['role'] == 'user' %}{{ '<s>' + '[INST] ' + content + ' [/INST]' }}{% elif message['role'] == 'assistant' %}{{ content + '</s>' }}{% endif %}{% endfor %}",
85
+ "clean_up_tokenization_spaces": false,
86
+ "eos_token": "</s>",
87
+ "model_max_length": 4096,
88
+ "pad_token": "<unk>",
89
+ "padding_side": "right",
90
+ "sp_model_kwargs": {},
91
+ "split_special_tokens": false,
92
+ "tokenizer_class": "NEOTokenizer",
93
+ "unk_token": "<unk>",
94
+ "use_fast": false
95
+ }