kechengcode commited on
Commit
5666d5e
·
verified ·
1 Parent(s): 935e8fe

Upload folder using huggingface_hub

Browse files
This view is limited to 50 files because it contains too many changes.   See raw diff
mlc-chat-config.json ADDED
@@ -0,0 +1,90 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "version": "0.1.0",
3
+ "model_type": "llama",
4
+ "quantization": "q0f16",
5
+ "model_config": {
6
+ "hidden_size": 4096,
7
+ "intermediate_size": 14336,
8
+ "num_attention_heads": 32,
9
+ "num_hidden_layers": 16,
10
+ "rms_norm_eps": 1e-05,
11
+ "vocab_size": 128256,
12
+ "tie_word_embeddings": false,
13
+ "position_embedding_base": 500000.0,
14
+ "rope_scaling": {
15
+ "factor": 8.0,
16
+ "high_freq_factor": 4.0,
17
+ "low_freq_factor": 1.0,
18
+ "original_max_position_embeddings": 8192,
19
+ "rope_type": "llama3"
20
+ },
21
+ "context_window_size": 131072,
22
+ "prefill_chunk_size": 8192,
23
+ "num_key_value_heads": 8,
24
+ "head_dim": 128,
25
+ "tensor_parallel_shards": 1,
26
+ "pipeline_parallel_stages": 1,
27
+ "max_batch_size": 1
28
+ },
29
+ "vocab_size": 128256,
30
+ "context_window_size": 131072,
31
+ "sliding_window_size": -1,
32
+ "prefill_chunk_size": 8192,
33
+ "attention_sink_size": -1,
34
+ "tensor_parallel_shards": 1,
35
+ "pipeline_parallel_stages": 1,
36
+ "temperature": 0.6,
37
+ "presence_penalty": 0.0,
38
+ "frequency_penalty": 0.0,
39
+ "repetition_penalty": 1.0,
40
+ "top_p": 0.9,
41
+ "tokenizer_files": [
42
+ "tokenizer.json",
43
+ "tokenizer_config.json"
44
+ ],
45
+ "tokenizer_info": {
46
+ "token_postproc_method": "byte_level",
47
+ "prepend_space_in_encode": false,
48
+ "strip_space_in_decode": false
49
+ },
50
+ "conv_template": {
51
+ "name": "llama-3_1",
52
+ "system_template": "<|start_header_id|>system<|end_header_id|>\n\n{system_message}<|eot_id|>",
53
+ "system_message": "You are a helpful, respectful and honest assistant.",
54
+ "system_prefix_token_ids": [
55
+ 128000
56
+ ],
57
+ "add_role_after_system_message": true,
58
+ "roles": {
59
+ "user": "<|start_header_id|>user",
60
+ "assistant": "<|start_header_id|>assistant",
61
+ "tool": "<|start_header_id|>ipython"
62
+ },
63
+ "role_templates": {
64
+ "user": "{user_message}",
65
+ "assistant": "{assistant_message}",
66
+ "tool": "{tool_message}"
67
+ },
68
+ "messages": [],
69
+ "seps": [
70
+ "<|eot_id|>"
71
+ ],
72
+ "role_content_sep": "<|end_header_id|>\n\n",
73
+ "role_empty_sep": "<|end_header_id|>\n\n",
74
+ "stop_str": [],
75
+ "stop_token_ids": [
76
+ 128001,
77
+ 128008,
78
+ 128009
79
+ ],
80
+ "function_string": "",
81
+ "use_function_calling": false
82
+ },
83
+ "pad_token_id": 0,
84
+ "bos_token_id": 128000,
85
+ "eos_token_id": [
86
+ 128001,
87
+ 128008,
88
+ 128009
89
+ ]
90
+ }
ndarray-cache.json ADDED
@@ -0,0 +1,1601 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "metadata": {
3
+ "ParamSize": 99,
4
+ "ParamBytes": 9080938496.0,
5
+ "BitsPerParam": 16.0
6
+ },
7
+ "records": [
8
+ {
9
+ "dataPath": "params_shard_0.bin",
10
+ "format": "raw-shard",
11
+ "nbytes": 1050673152,
12
+ "records": [
13
+ {
14
+ "name": "lm_head.weight",
15
+ "shape": [
16
+ 128256,
17
+ 4096
18
+ ],
19
+ "dtype": "float16",
20
+ "format": "f32-to-bf16",
21
+ "nbytes": 1050673152,
22
+ "byteOffset": 0
23
+ }
24
+ ],
25
+ "md5sum": "75edd591cd36aeac9b51ef921ce49e6d"
26
+ },
27
+ {
28
+ "dataPath": "params_shard_1.bin",
29
+ "format": "raw-shard",
30
+ "nbytes": 117440512,
31
+ "records": [
32
+ {
33
+ "name": "model.layers.10.mlp.down_proj.weight",
34
+ "shape": [
35
+ 4096,
36
+ 14336
37
+ ],
38
+ "dtype": "float16",
39
+ "format": "f32-to-bf16",
40
+ "nbytes": 117440512,
41
+ "byteOffset": 0
42
+ }
43
+ ],
44
+ "md5sum": "882adac81ef5715c534ae6b6dfca6a8a"
45
+ },
46
+ {
47
+ "dataPath": "params_shard_2.bin",
48
+ "format": "raw-shard",
49
+ "nbytes": 234881024,
50
+ "records": [
51
+ {
52
+ "name": "model.layers.10.mlp.gate_up_proj.weight",
53
+ "shape": [
54
+ 28672,
55
+ 4096
56
+ ],
57
+ "dtype": "float16",
58
+ "format": "f32-to-bf16",
59
+ "nbytes": 234881024,
60
+ "byteOffset": 0
61
+ }
62
+ ],
63
+ "md5sum": "6685f6e417a8f6032e1e648685682421"
64
+ },
65
+ {
66
+ "dataPath": "params_shard_3.bin",
67
+ "format": "raw-shard",
68
+ "nbytes": 50331648,
69
+ "records": [
70
+ {
71
+ "name": "model.layers.10.self_attn.qkv_proj.weight",
72
+ "shape": [
73
+ 6144,
74
+ 4096
75
+ ],
76
+ "dtype": "float16",
77
+ "format": "f32-to-bf16",
78
+ "nbytes": 50331648,
79
+ "byteOffset": 0
80
+ }
81
+ ],
82
+ "md5sum": "bb0d281ced9faf26f31bef662e487446"
83
+ },
84
+ {
85
+ "dataPath": "params_shard_4.bin",
86
+ "format": "raw-shard",
87
+ "nbytes": 33554432,
88
+ "records": [
89
+ {
90
+ "name": "model.layers.10.self_attn.o_proj.weight",
91
+ "shape": [
92
+ 4096,
93
+ 4096
94
+ ],
95
+ "dtype": "float16",
96
+ "format": "f32-to-bf16",
97
+ "nbytes": 33554432,
98
+ "byteOffset": 0
99
+ }
100
+ ],
101
+ "md5sum": "c64e84066f502e276d6e632d89deb55e"
102
+ },
103
+ {
104
+ "dataPath": "params_shard_5.bin",
105
+ "format": "raw-shard",
106
+ "nbytes": 117440512,
107
+ "records": [
108
+ {
109
+ "name": "model.layers.11.mlp.down_proj.weight",
110
+ "shape": [
111
+ 4096,
112
+ 14336
113
+ ],
114
+ "dtype": "float16",
115
+ "format": "f32-to-bf16",
116
+ "nbytes": 117440512,
117
+ "byteOffset": 0
118
+ }
119
+ ],
120
+ "md5sum": "53e53d7d42ea7237d58db0c90207f321"
121
+ },
122
+ {
123
+ "dataPath": "params_shard_6.bin",
124
+ "format": "raw-shard",
125
+ "nbytes": 234881024,
126
+ "records": [
127
+ {
128
+ "name": "model.layers.11.mlp.gate_up_proj.weight",
129
+ "shape": [
130
+ 28672,
131
+ 4096
132
+ ],
133
+ "dtype": "float16",
134
+ "format": "f32-to-bf16",
135
+ "nbytes": 234881024,
136
+ "byteOffset": 0
137
+ }
138
+ ],
139
+ "md5sum": "b4cafce0dd2ad86578f5ef0c598d767d"
140
+ },
141
+ {
142
+ "dataPath": "params_shard_7.bin",
143
+ "format": "raw-shard",
144
+ "nbytes": 50331648,
145
+ "records": [
146
+ {
147
+ "name": "model.layers.11.self_attn.qkv_proj.weight",
148
+ "shape": [
149
+ 6144,
150
+ 4096
151
+ ],
152
+ "dtype": "float16",
153
+ "format": "f32-to-bf16",
154
+ "nbytes": 50331648,
155
+ "byteOffset": 0
156
+ }
157
+ ],
158
+ "md5sum": "0ab8a1a37b3edf246e610db0b820407e"
159
+ },
160
+ {
161
+ "dataPath": "params_shard_8.bin",
162
+ "format": "raw-shard",
163
+ "nbytes": 33554432,
164
+ "records": [
165
+ {
166
+ "name": "model.layers.11.self_attn.o_proj.weight",
167
+ "shape": [
168
+ 4096,
169
+ 4096
170
+ ],
171
+ "dtype": "float16",
172
+ "format": "f32-to-bf16",
173
+ "nbytes": 33554432,
174
+ "byteOffset": 0
175
+ }
176
+ ],
177
+ "md5sum": "559c69b36e2dcb7288641032f85176a8"
178
+ },
179
+ {
180
+ "dataPath": "params_shard_9.bin",
181
+ "format": "raw-shard",
182
+ "nbytes": 117440512,
183
+ "records": [
184
+ {
185
+ "name": "model.layers.12.mlp.down_proj.weight",
186
+ "shape": [
187
+ 4096,
188
+ 14336
189
+ ],
190
+ "dtype": "float16",
191
+ "format": "f32-to-bf16",
192
+ "nbytes": 117440512,
193
+ "byteOffset": 0
194
+ }
195
+ ],
196
+ "md5sum": "74ace719a6dbb05c2eba2beaf4066d72"
197
+ },
198
+ {
199
+ "dataPath": "params_shard_10.bin",
200
+ "format": "raw-shard",
201
+ "nbytes": 234881024,
202
+ "records": [
203
+ {
204
+ "name": "model.layers.12.mlp.gate_up_proj.weight",
205
+ "shape": [
206
+ 28672,
207
+ 4096
208
+ ],
209
+ "dtype": "float16",
210
+ "format": "f32-to-bf16",
211
+ "nbytes": 234881024,
212
+ "byteOffset": 0
213
+ }
214
+ ],
215
+ "md5sum": "aeef8a609db53e108557bad1f10a3be7"
216
+ },
217
+ {
218
+ "dataPath": "params_shard_11.bin",
219
+ "format": "raw-shard",
220
+ "nbytes": 50331648,
221
+ "records": [
222
+ {
223
+ "name": "model.layers.12.self_attn.qkv_proj.weight",
224
+ "shape": [
225
+ 6144,
226
+ 4096
227
+ ],
228
+ "dtype": "float16",
229
+ "format": "f32-to-bf16",
230
+ "nbytes": 50331648,
231
+ "byteOffset": 0
232
+ }
233
+ ],
234
+ "md5sum": "9ff3eca975f628c98f48541c57fddf3b"
235
+ },
236
+ {
237
+ "dataPath": "params_shard_12.bin",
238
+ "format": "raw-shard",
239
+ "nbytes": 33554432,
240
+ "records": [
241
+ {
242
+ "name": "model.layers.12.self_attn.o_proj.weight",
243
+ "shape": [
244
+ 4096,
245
+ 4096
246
+ ],
247
+ "dtype": "float16",
248
+ "format": "f32-to-bf16",
249
+ "nbytes": 33554432,
250
+ "byteOffset": 0
251
+ }
252
+ ],
253
+ "md5sum": "41e28bb232e53fa15619a7103ef5e0d6"
254
+ },
255
+ {
256
+ "dataPath": "params_shard_13.bin",
257
+ "format": "raw-shard",
258
+ "nbytes": 117440512,
259
+ "records": [
260
+ {
261
+ "name": "model.layers.13.mlp.down_proj.weight",
262
+ "shape": [
263
+ 4096,
264
+ 14336
265
+ ],
266
+ "dtype": "float16",
267
+ "format": "f32-to-bf16",
268
+ "nbytes": 117440512,
269
+ "byteOffset": 0
270
+ }
271
+ ],
272
+ "md5sum": "6e75989b9677586f21f73591ae348135"
273
+ },
274
+ {
275
+ "dataPath": "params_shard_14.bin",
276
+ "format": "raw-shard",
277
+ "nbytes": 234881024,
278
+ "records": [
279
+ {
280
+ "name": "model.layers.13.mlp.gate_up_proj.weight",
281
+ "shape": [
282
+ 28672,
283
+ 4096
284
+ ],
285
+ "dtype": "float16",
286
+ "format": "f32-to-bf16",
287
+ "nbytes": 234881024,
288
+ "byteOffset": 0
289
+ }
290
+ ],
291
+ "md5sum": "1ca35e693290541b8b9ba1d149e7e429"
292
+ },
293
+ {
294
+ "dataPath": "params_shard_15.bin",
295
+ "format": "raw-shard",
296
+ "nbytes": 50331648,
297
+ "records": [
298
+ {
299
+ "name": "model.layers.13.self_attn.qkv_proj.weight",
300
+ "shape": [
301
+ 6144,
302
+ 4096
303
+ ],
304
+ "dtype": "float16",
305
+ "format": "f32-to-bf16",
306
+ "nbytes": 50331648,
307
+ "byteOffset": 0
308
+ }
309
+ ],
310
+ "md5sum": "fbbd78f04c1a0ac7dc7801fd432f0cfc"
311
+ },
312
+ {
313
+ "dataPath": "params_shard_16.bin",
314
+ "format": "raw-shard",
315
+ "nbytes": 33554432,
316
+ "records": [
317
+ {
318
+ "name": "model.layers.13.self_attn.o_proj.weight",
319
+ "shape": [
320
+ 4096,
321
+ 4096
322
+ ],
323
+ "dtype": "float16",
324
+ "format": "f32-to-bf16",
325
+ "nbytes": 33554432,
326
+ "byteOffset": 0
327
+ }
328
+ ],
329
+ "md5sum": "069e926e8a015214466100ff3e6ff924"
330
+ },
331
+ {
332
+ "dataPath": "params_shard_17.bin",
333
+ "format": "raw-shard",
334
+ "nbytes": 117440512,
335
+ "records": [
336
+ {
337
+ "name": "model.layers.14.mlp.down_proj.weight",
338
+ "shape": [
339
+ 4096,
340
+ 14336
341
+ ],
342
+ "dtype": "float16",
343
+ "format": "f32-to-bf16",
344
+ "nbytes": 117440512,
345
+ "byteOffset": 0
346
+ }
347
+ ],
348
+ "md5sum": "36913890da219b82c0535faa63141777"
349
+ },
350
+ {
351
+ "dataPath": "params_shard_18.bin",
352
+ "format": "raw-shard",
353
+ "nbytes": 234881024,
354
+ "records": [
355
+ {
356
+ "name": "model.layers.14.mlp.gate_up_proj.weight",
357
+ "shape": [
358
+ 28672,
359
+ 4096
360
+ ],
361
+ "dtype": "float16",
362
+ "format": "f32-to-bf16",
363
+ "nbytes": 234881024,
364
+ "byteOffset": 0
365
+ }
366
+ ],
367
+ "md5sum": "83fec9c67c69936a8eb7efab2edd755d"
368
+ },
369
+ {
370
+ "dataPath": "params_shard_19.bin",
371
+ "format": "raw-shard",
372
+ "nbytes": 50331648,
373
+ "records": [
374
+ {
375
+ "name": "model.layers.14.self_attn.qkv_proj.weight",
376
+ "shape": [
377
+ 6144,
378
+ 4096
379
+ ],
380
+ "dtype": "float16",
381
+ "format": "f32-to-bf16",
382
+ "nbytes": 50331648,
383
+ "byteOffset": 0
384
+ }
385
+ ],
386
+ "md5sum": "8095fd912c1622273c11fd2a971d244c"
387
+ },
388
+ {
389
+ "dataPath": "params_shard_20.bin",
390
+ "format": "raw-shard",
391
+ "nbytes": 33554432,
392
+ "records": [
393
+ {
394
+ "name": "model.layers.14.self_attn.o_proj.weight",
395
+ "shape": [
396
+ 4096,
397
+ 4096
398
+ ],
399
+ "dtype": "float16",
400
+ "format": "f32-to-bf16",
401
+ "nbytes": 33554432,
402
+ "byteOffset": 0
403
+ }
404
+ ],
405
+ "md5sum": "af1902028e36c5819e52592072ce67a5"
406
+ },
407
+ {
408
+ "dataPath": "params_shard_21.bin",
409
+ "format": "raw-shard",
410
+ "nbytes": 117440512,
411
+ "records": [
412
+ {
413
+ "name": "model.layers.15.mlp.down_proj.weight",
414
+ "shape": [
415
+ 4096,
416
+ 14336
417
+ ],
418
+ "dtype": "float16",
419
+ "format": "f32-to-bf16",
420
+ "nbytes": 117440512,
421
+ "byteOffset": 0
422
+ }
423
+ ],
424
+ "md5sum": "8189251b9267ac10b1df23d471730c45"
425
+ },
426
+ {
427
+ "dataPath": "params_shard_22.bin",
428
+ "format": "raw-shard",
429
+ "nbytes": 234881024,
430
+ "records": [
431
+ {
432
+ "name": "model.layers.15.mlp.gate_up_proj.weight",
433
+ "shape": [
434
+ 28672,
435
+ 4096
436
+ ],
437
+ "dtype": "float16",
438
+ "format": "f32-to-bf16",
439
+ "nbytes": 234881024,
440
+ "byteOffset": 0
441
+ }
442
+ ],
443
+ "md5sum": "7f1509e4cdc899ef0472fbeac943906a"
444
+ },
445
+ {
446
+ "dataPath": "params_shard_23.bin",
447
+ "format": "raw-shard",
448
+ "nbytes": 50331648,
449
+ "records": [
450
+ {
451
+ "name": "model.layers.15.self_attn.qkv_proj.weight",
452
+ "shape": [
453
+ 6144,
454
+ 4096
455
+ ],
456
+ "dtype": "float16",
457
+ "format": "f32-to-bf16",
458
+ "nbytes": 50331648,
459
+ "byteOffset": 0
460
+ }
461
+ ],
462
+ "md5sum": "43518c0a6fff6447442a6d89efa1ff50"
463
+ },
464
+ {
465
+ "dataPath": "params_shard_24.bin",
466
+ "format": "raw-shard",
467
+ "nbytes": 33554432,
468
+ "records": [
469
+ {
470
+ "name": "model.layers.15.self_attn.o_proj.weight",
471
+ "shape": [
472
+ 4096,
473
+ 4096
474
+ ],
475
+ "dtype": "float16",
476
+ "format": "f32-to-bf16",
477
+ "nbytes": 33554432,
478
+ "byteOffset": 0
479
+ }
480
+ ],
481
+ "md5sum": "ea9cf82232756ec2834bbdaa38b243c2"
482
+ },
483
+ {
484
+ "dataPath": "params_shard_25.bin",
485
+ "format": "raw-shard",
486
+ "nbytes": 117440512,
487
+ "records": [
488
+ {
489
+ "name": "model.layers.9.mlp.down_proj.weight",
490
+ "shape": [
491
+ 4096,
492
+ 14336
493
+ ],
494
+ "dtype": "float16",
495
+ "format": "f32-to-bf16",
496
+ "nbytes": 117440512,
497
+ "byteOffset": 0
498
+ }
499
+ ],
500
+ "md5sum": "55b04d0c593293dadb6f08db1bed6755"
501
+ },
502
+ {
503
+ "dataPath": "params_shard_26.bin",
504
+ "format": "raw-shard",
505
+ "nbytes": 234881024,
506
+ "records": [
507
+ {
508
+ "name": "model.layers.9.mlp.gate_up_proj.weight",
509
+ "shape": [
510
+ 28672,
511
+ 4096
512
+ ],
513
+ "dtype": "float16",
514
+ "format": "f32-to-bf16",
515
+ "nbytes": 234881024,
516
+ "byteOffset": 0
517
+ }
518
+ ],
519
+ "md5sum": "242c8c0fdbee5a54f4d1453f66a2b9cf"
520
+ },
521
+ {
522
+ "dataPath": "params_shard_27.bin",
523
+ "format": "raw-shard",
524
+ "nbytes": 50331648,
525
+ "records": [
526
+ {
527
+ "name": "model.layers.9.self_attn.qkv_proj.weight",
528
+ "shape": [
529
+ 6144,
530
+ 4096
531
+ ],
532
+ "dtype": "float16",
533
+ "format": "f32-to-bf16",
534
+ "nbytes": 50331648,
535
+ "byteOffset": 0
536
+ }
537
+ ],
538
+ "md5sum": "169e455da07f15b7bfd0f86b9e13920d"
539
+ },
540
+ {
541
+ "dataPath": "params_shard_28.bin",
542
+ "format": "raw-shard",
543
+ "nbytes": 33554432,
544
+ "records": [
545
+ {
546
+ "name": "model.layers.9.self_attn.o_proj.weight",
547
+ "shape": [
548
+ 4096,
549
+ 4096
550
+ ],
551
+ "dtype": "float16",
552
+ "format": "f32-to-bf16",
553
+ "nbytes": 33554432,
554
+ "byteOffset": 0
555
+ }
556
+ ],
557
+ "md5sum": "9dbb023c23e0b6b9b095eca81e6e6ffc"
558
+ },
559
+ {
560
+ "dataPath": "params_shard_29.bin",
561
+ "format": "raw-shard",
562
+ "nbytes": 1050673152,
563
+ "records": [
564
+ {
565
+ "name": "model.embed_tokens.weight",
566
+ "shape": [
567
+ 128256,
568
+ 4096
569
+ ],
570
+ "dtype": "float16",
571
+ "format": "f32-to-bf16",
572
+ "nbytes": 1050673152,
573
+ "byteOffset": 0
574
+ }
575
+ ],
576
+ "md5sum": "61a3075c1d56322e3b21589b5a40077a"
577
+ },
578
+ {
579
+ "dataPath": "params_shard_30.bin",
580
+ "format": "raw-shard",
581
+ "nbytes": 117440512,
582
+ "records": [
583
+ {
584
+ "name": "model.layers.0.mlp.down_proj.weight",
585
+ "shape": [
586
+ 4096,
587
+ 14336
588
+ ],
589
+ "dtype": "float16",
590
+ "format": "f32-to-bf16",
591
+ "nbytes": 117440512,
592
+ "byteOffset": 0
593
+ }
594
+ ],
595
+ "md5sum": "e38adc5f6cd900d6a4ddce51c418b621"
596
+ },
597
+ {
598
+ "dataPath": "params_shard_31.bin",
599
+ "format": "raw-shard",
600
+ "nbytes": 234881024,
601
+ "records": [
602
+ {
603
+ "name": "model.layers.0.mlp.gate_up_proj.weight",
604
+ "shape": [
605
+ 28672,
606
+ 4096
607
+ ],
608
+ "dtype": "float16",
609
+ "format": "f32-to-bf16",
610
+ "nbytes": 234881024,
611
+ "byteOffset": 0
612
+ }
613
+ ],
614
+ "md5sum": "2688b8c753a249db64bb8096cbb4ee46"
615
+ },
616
+ {
617
+ "dataPath": "params_shard_32.bin",
618
+ "format": "raw-shard",
619
+ "nbytes": 50331648,
620
+ "records": [
621
+ {
622
+ "name": "model.layers.0.self_attn.qkv_proj.weight",
623
+ "shape": [
624
+ 6144,
625
+ 4096
626
+ ],
627
+ "dtype": "float16",
628
+ "format": "f32-to-bf16",
629
+ "nbytes": 50331648,
630
+ "byteOffset": 0
631
+ }
632
+ ],
633
+ "md5sum": "bde37a0116316bc7a2edb954db041f9a"
634
+ },
635
+ {
636
+ "dataPath": "params_shard_33.bin",
637
+ "format": "raw-shard",
638
+ "nbytes": 33554432,
639
+ "records": [
640
+ {
641
+ "name": "model.layers.0.self_attn.o_proj.weight",
642
+ "shape": [
643
+ 4096,
644
+ 4096
645
+ ],
646
+ "dtype": "float16",
647
+ "format": "f32-to-bf16",
648
+ "nbytes": 33554432,
649
+ "byteOffset": 0
650
+ }
651
+ ],
652
+ "md5sum": "0751d8c096fb83e2449ad595823c329d"
653
+ },
654
+ {
655
+ "dataPath": "params_shard_34.bin",
656
+ "format": "raw-shard",
657
+ "nbytes": 117440512,
658
+ "records": [
659
+ {
660
+ "name": "model.layers.1.mlp.down_proj.weight",
661
+ "shape": [
662
+ 4096,
663
+ 14336
664
+ ],
665
+ "dtype": "float16",
666
+ "format": "f32-to-bf16",
667
+ "nbytes": 117440512,
668
+ "byteOffset": 0
669
+ }
670
+ ],
671
+ "md5sum": "01b027498d27b92bfcc84183e5d3d34c"
672
+ },
673
+ {
674
+ "dataPath": "params_shard_35.bin",
675
+ "format": "raw-shard",
676
+ "nbytes": 234881024,
677
+ "records": [
678
+ {
679
+ "name": "model.layers.1.mlp.gate_up_proj.weight",
680
+ "shape": [
681
+ 28672,
682
+ 4096
683
+ ],
684
+ "dtype": "float16",
685
+ "format": "f32-to-bf16",
686
+ "nbytes": 234881024,
687
+ "byteOffset": 0
688
+ }
689
+ ],
690
+ "md5sum": "47d49cf7c227f7486cad873bedebcc35"
691
+ },
692
+ {
693
+ "dataPath": "params_shard_36.bin",
694
+ "format": "raw-shard",
695
+ "nbytes": 50331648,
696
+ "records": [
697
+ {
698
+ "name": "model.layers.1.self_attn.qkv_proj.weight",
699
+ "shape": [
700
+ 6144,
701
+ 4096
702
+ ],
703
+ "dtype": "float16",
704
+ "format": "f32-to-bf16",
705
+ "nbytes": 50331648,
706
+ "byteOffset": 0
707
+ }
708
+ ],
709
+ "md5sum": "6c6ce14ce180475895eec3d83946c1cb"
710
+ },
711
+ {
712
+ "dataPath": "params_shard_37.bin",
713
+ "format": "raw-shard",
714
+ "nbytes": 33554432,
715
+ "records": [
716
+ {
717
+ "name": "model.layers.1.self_attn.o_proj.weight",
718
+ "shape": [
719
+ 4096,
720
+ 4096
721
+ ],
722
+ "dtype": "float16",
723
+ "format": "f32-to-bf16",
724
+ "nbytes": 33554432,
725
+ "byteOffset": 0
726
+ }
727
+ ],
728
+ "md5sum": "89605654f707dbe9ad46f348065b2a30"
729
+ },
730
+ {
731
+ "dataPath": "params_shard_38.bin",
732
+ "format": "raw-shard",
733
+ "nbytes": 117440512,
734
+ "records": [
735
+ {
736
+ "name": "model.layers.2.mlp.down_proj.weight",
737
+ "shape": [
738
+ 4096,
739
+ 14336
740
+ ],
741
+ "dtype": "float16",
742
+ "format": "f32-to-bf16",
743
+ "nbytes": 117440512,
744
+ "byteOffset": 0
745
+ }
746
+ ],
747
+ "md5sum": "36be3875e892660f5ce0130d3cf0e123"
748
+ },
749
+ {
750
+ "dataPath": "params_shard_39.bin",
751
+ "format": "raw-shard",
752
+ "nbytes": 234881024,
753
+ "records": [
754
+ {
755
+ "name": "model.layers.2.mlp.gate_up_proj.weight",
756
+ "shape": [
757
+ 28672,
758
+ 4096
759
+ ],
760
+ "dtype": "float16",
761
+ "format": "f32-to-bf16",
762
+ "nbytes": 234881024,
763
+ "byteOffset": 0
764
+ }
765
+ ],
766
+ "md5sum": "cbaebeea0c7212f5c3d155d623c373a4"
767
+ },
768
+ {
769
+ "dataPath": "params_shard_40.bin",
770
+ "format": "raw-shard",
771
+ "nbytes": 50331648,
772
+ "records": [
773
+ {
774
+ "name": "model.layers.2.self_attn.qkv_proj.weight",
775
+ "shape": [
776
+ 6144,
777
+ 4096
778
+ ],
779
+ "dtype": "float16",
780
+ "format": "f32-to-bf16",
781
+ "nbytes": 50331648,
782
+ "byteOffset": 0
783
+ }
784
+ ],
785
+ "md5sum": "a1d8e1d654e66a793c9570296a38120d"
786
+ },
787
+ {
788
+ "dataPath": "params_shard_41.bin",
789
+ "format": "raw-shard",
790
+ "nbytes": 33554432,
791
+ "records": [
792
+ {
793
+ "name": "model.layers.2.self_attn.o_proj.weight",
794
+ "shape": [
795
+ 4096,
796
+ 4096
797
+ ],
798
+ "dtype": "float16",
799
+ "format": "f32-to-bf16",
800
+ "nbytes": 33554432,
801
+ "byteOffset": 0
802
+ }
803
+ ],
804
+ "md5sum": "bfb9e277e026e401401d1f2ff74b0bb4"
805
+ },
806
+ {
807
+ "dataPath": "params_shard_42.bin",
808
+ "format": "raw-shard",
809
+ "nbytes": 117440512,
810
+ "records": [
811
+ {
812
+ "name": "model.layers.3.mlp.down_proj.weight",
813
+ "shape": [
814
+ 4096,
815
+ 14336
816
+ ],
817
+ "dtype": "float16",
818
+ "format": "f32-to-bf16",
819
+ "nbytes": 117440512,
820
+ "byteOffset": 0
821
+ }
822
+ ],
823
+ "md5sum": "8ba44f0035b36f8400d8d58e2b1048cf"
824
+ },
825
+ {
826
+ "dataPath": "params_shard_43.bin",
827
+ "format": "raw-shard",
828
+ "nbytes": 234881024,
829
+ "records": [
830
+ {
831
+ "name": "model.layers.3.mlp.gate_up_proj.weight",
832
+ "shape": [
833
+ 28672,
834
+ 4096
835
+ ],
836
+ "dtype": "float16",
837
+ "format": "f32-to-bf16",
838
+ "nbytes": 234881024,
839
+ "byteOffset": 0
840
+ }
841
+ ],
842
+ "md5sum": "59d3f67b229b77d3c7d636fa81956d13"
843
+ },
844
+ {
845
+ "dataPath": "params_shard_44.bin",
846
+ "format": "raw-shard",
847
+ "nbytes": 50331648,
848
+ "records": [
849
+ {
850
+ "name": "model.layers.3.self_attn.qkv_proj.weight",
851
+ "shape": [
852
+ 6144,
853
+ 4096
854
+ ],
855
+ "dtype": "float16",
856
+ "format": "f32-to-bf16",
857
+ "nbytes": 50331648,
858
+ "byteOffset": 0
859
+ }
860
+ ],
861
+ "md5sum": "2b60c35074e68fe4c2c9d2fb39411bcf"
862
+ },
863
+ {
864
+ "dataPath": "params_shard_45.bin",
865
+ "format": "raw-shard",
866
+ "nbytes": 33554432,
867
+ "records": [
868
+ {
869
+ "name": "model.layers.3.self_attn.o_proj.weight",
870
+ "shape": [
871
+ 4096,
872
+ 4096
873
+ ],
874
+ "dtype": "float16",
875
+ "format": "f32-to-bf16",
876
+ "nbytes": 33554432,
877
+ "byteOffset": 0
878
+ }
879
+ ],
880
+ "md5sum": "fd0947763bd4a6c920df8f115d281b59"
881
+ },
882
+ {
883
+ "dataPath": "params_shard_46.bin",
884
+ "format": "raw-shard",
885
+ "nbytes": 117440512,
886
+ "records": [
887
+ {
888
+ "name": "model.layers.4.mlp.down_proj.weight",
889
+ "shape": [
890
+ 4096,
891
+ 14336
892
+ ],
893
+ "dtype": "float16",
894
+ "format": "f32-to-bf16",
895
+ "nbytes": 117440512,
896
+ "byteOffset": 0
897
+ }
898
+ ],
899
+ "md5sum": "c0515ff9e26c249190d472180cfd75fa"
900
+ },
901
+ {
902
+ "dataPath": "params_shard_47.bin",
903
+ "format": "raw-shard",
904
+ "nbytes": 234881024,
905
+ "records": [
906
+ {
907
+ "name": "model.layers.4.mlp.gate_up_proj.weight",
908
+ "shape": [
909
+ 28672,
910
+ 4096
911
+ ],
912
+ "dtype": "float16",
913
+ "format": "f32-to-bf16",
914
+ "nbytes": 234881024,
915
+ "byteOffset": 0
916
+ }
917
+ ],
918
+ "md5sum": "74951cccccb4365035c8e64db27a0153"
919
+ },
920
+ {
921
+ "dataPath": "params_shard_48.bin",
922
+ "format": "raw-shard",
923
+ "nbytes": 50331648,
924
+ "records": [
925
+ {
926
+ "name": "model.layers.4.self_attn.qkv_proj.weight",
927
+ "shape": [
928
+ 6144,
929
+ 4096
930
+ ],
931
+ "dtype": "float16",
932
+ "format": "f32-to-bf16",
933
+ "nbytes": 50331648,
934
+ "byteOffset": 0
935
+ }
936
+ ],
937
+ "md5sum": "f5ac4ef990bdadbaa941f1d3fa38a0b9"
938
+ },
939
+ {
940
+ "dataPath": "params_shard_49.bin",
941
+ "format": "raw-shard",
942
+ "nbytes": 33554432,
943
+ "records": [
944
+ {
945
+ "name": "model.layers.4.self_attn.o_proj.weight",
946
+ "shape": [
947
+ 4096,
948
+ 4096
949
+ ],
950
+ "dtype": "float16",
951
+ "format": "f32-to-bf16",
952
+ "nbytes": 33554432,
953
+ "byteOffset": 0
954
+ }
955
+ ],
956
+ "md5sum": "9036a0a1d92841dd1720590232ff092b"
957
+ },
958
+ {
959
+ "dataPath": "params_shard_50.bin",
960
+ "format": "raw-shard",
961
+ "nbytes": 117440512,
962
+ "records": [
963
+ {
964
+ "name": "model.layers.5.mlp.down_proj.weight",
965
+ "shape": [
966
+ 4096,
967
+ 14336
968
+ ],
969
+ "dtype": "float16",
970
+ "format": "f32-to-bf16",
971
+ "nbytes": 117440512,
972
+ "byteOffset": 0
973
+ }
974
+ ],
975
+ "md5sum": "bbab290a1a40b518721a5c287f313e22"
976
+ },
977
+ {
978
+ "dataPath": "params_shard_51.bin",
979
+ "format": "raw-shard",
980
+ "nbytes": 234881024,
981
+ "records": [
982
+ {
983
+ "name": "model.layers.5.mlp.gate_up_proj.weight",
984
+ "shape": [
985
+ 28672,
986
+ 4096
987
+ ],
988
+ "dtype": "float16",
989
+ "format": "f32-to-bf16",
990
+ "nbytes": 234881024,
991
+ "byteOffset": 0
992
+ }
993
+ ],
994
+ "md5sum": "fdb9cf70c17fec072633c03e4cf0b63e"
995
+ },
996
+ {
997
+ "dataPath": "params_shard_52.bin",
998
+ "format": "raw-shard",
999
+ "nbytes": 50331648,
1000
+ "records": [
1001
+ {
1002
+ "name": "model.layers.5.self_attn.qkv_proj.weight",
1003
+ "shape": [
1004
+ 6144,
1005
+ 4096
1006
+ ],
1007
+ "dtype": "float16",
1008
+ "format": "f32-to-bf16",
1009
+ "nbytes": 50331648,
1010
+ "byteOffset": 0
1011
+ }
1012
+ ],
1013
+ "md5sum": "9693c2aa897188396cd3eab7040d51ce"
1014
+ },
1015
+ {
1016
+ "dataPath": "params_shard_53.bin",
1017
+ "format": "raw-shard",
1018
+ "nbytes": 33554432,
1019
+ "records": [
1020
+ {
1021
+ "name": "model.layers.5.self_attn.o_proj.weight",
1022
+ "shape": [
1023
+ 4096,
1024
+ 4096
1025
+ ],
1026
+ "dtype": "float16",
1027
+ "format": "f32-to-bf16",
1028
+ "nbytes": 33554432,
1029
+ "byteOffset": 0
1030
+ }
1031
+ ],
1032
+ "md5sum": "8334828cca96b690fbc2c612ec5d06ea"
1033
+ },
1034
+ {
1035
+ "dataPath": "params_shard_54.bin",
1036
+ "format": "raw-shard",
1037
+ "nbytes": 117440512,
1038
+ "records": [
1039
+ {
1040
+ "name": "model.layers.6.mlp.down_proj.weight",
1041
+ "shape": [
1042
+ 4096,
1043
+ 14336
1044
+ ],
1045
+ "dtype": "float16",
1046
+ "format": "f32-to-bf16",
1047
+ "nbytes": 117440512,
1048
+ "byteOffset": 0
1049
+ }
1050
+ ],
1051
+ "md5sum": "783b2d84e42a6402397001749439c350"
1052
+ },
1053
+ {
1054
+ "dataPath": "params_shard_55.bin",
1055
+ "format": "raw-shard",
1056
+ "nbytes": 234881024,
1057
+ "records": [
1058
+ {
1059
+ "name": "model.layers.6.mlp.gate_up_proj.weight",
1060
+ "shape": [
1061
+ 28672,
1062
+ 4096
1063
+ ],
1064
+ "dtype": "float16",
1065
+ "format": "f32-to-bf16",
1066
+ "nbytes": 234881024,
1067
+ "byteOffset": 0
1068
+ }
1069
+ ],
1070
+ "md5sum": "68c0820b730f9b61fe0390ec719c4e45"
1071
+ },
1072
+ {
1073
+ "dataPath": "params_shard_56.bin",
1074
+ "format": "raw-shard",
1075
+ "nbytes": 50331648,
1076
+ "records": [
1077
+ {
1078
+ "name": "model.layers.6.self_attn.qkv_proj.weight",
1079
+ "shape": [
1080
+ 6144,
1081
+ 4096
1082
+ ],
1083
+ "dtype": "float16",
1084
+ "format": "f32-to-bf16",
1085
+ "nbytes": 50331648,
1086
+ "byteOffset": 0
1087
+ }
1088
+ ],
1089
+ "md5sum": "4a39640abc08329a9b19c2a847bc461a"
1090
+ },
1091
+ {
1092
+ "dataPath": "params_shard_57.bin",
1093
+ "format": "raw-shard",
1094
+ "nbytes": 33554432,
1095
+ "records": [
1096
+ {
1097
+ "name": "model.layers.6.self_attn.o_proj.weight",
1098
+ "shape": [
1099
+ 4096,
1100
+ 4096
1101
+ ],
1102
+ "dtype": "float16",
1103
+ "format": "f32-to-bf16",
1104
+ "nbytes": 33554432,
1105
+ "byteOffset": 0
1106
+ }
1107
+ ],
1108
+ "md5sum": "2636ab38f2bdf0693f8672e9b23f5927"
1109
+ },
1110
+ {
1111
+ "dataPath": "params_shard_58.bin",
1112
+ "format": "raw-shard",
1113
+ "nbytes": 117440512,
1114
+ "records": [
1115
+ {
1116
+ "name": "model.layers.7.mlp.down_proj.weight",
1117
+ "shape": [
1118
+ 4096,
1119
+ 14336
1120
+ ],
1121
+ "dtype": "float16",
1122
+ "format": "f32-to-bf16",
1123
+ "nbytes": 117440512,
1124
+ "byteOffset": 0
1125
+ }
1126
+ ],
1127
+ "md5sum": "31d16ba9a1b8044311b6da2fb1f802e3"
1128
+ },
1129
+ {
1130
+ "dataPath": "params_shard_59.bin",
1131
+ "format": "raw-shard",
1132
+ "nbytes": 234881024,
1133
+ "records": [
1134
+ {
1135
+ "name": "model.layers.7.mlp.gate_up_proj.weight",
1136
+ "shape": [
1137
+ 28672,
1138
+ 4096
1139
+ ],
1140
+ "dtype": "float16",
1141
+ "format": "f32-to-bf16",
1142
+ "nbytes": 234881024,
1143
+ "byteOffset": 0
1144
+ }
1145
+ ],
1146
+ "md5sum": "04a2c934711cd4ca9c06256add28c53d"
1147
+ },
1148
+ {
1149
+ "dataPath": "params_shard_60.bin",
1150
+ "format": "raw-shard",
1151
+ "nbytes": 50331648,
1152
+ "records": [
1153
+ {
1154
+ "name": "model.layers.7.self_attn.qkv_proj.weight",
1155
+ "shape": [
1156
+ 6144,
1157
+ 4096
1158
+ ],
1159
+ "dtype": "float16",
1160
+ "format": "f32-to-bf16",
1161
+ "nbytes": 50331648,
1162
+ "byteOffset": 0
1163
+ }
1164
+ ],
1165
+ "md5sum": "29c78d4b12bf9dd955a8e474b0a95560"
1166
+ },
1167
+ {
1168
+ "dataPath": "params_shard_61.bin",
1169
+ "format": "raw-shard",
1170
+ "nbytes": 33554432,
1171
+ "records": [
1172
+ {
1173
+ "name": "model.layers.7.self_attn.o_proj.weight",
1174
+ "shape": [
1175
+ 4096,
1176
+ 4096
1177
+ ],
1178
+ "dtype": "float16",
1179
+ "format": "f32-to-bf16",
1180
+ "nbytes": 33554432,
1181
+ "byteOffset": 0
1182
+ }
1183
+ ],
1184
+ "md5sum": "719fb7aae11d67061f7e658fc85d588f"
1185
+ },
1186
+ {
1187
+ "dataPath": "params_shard_62.bin",
1188
+ "format": "raw-shard",
1189
+ "nbytes": 117440512,
1190
+ "records": [
1191
+ {
1192
+ "name": "model.layers.8.mlp.down_proj.weight",
1193
+ "shape": [
1194
+ 4096,
1195
+ 14336
1196
+ ],
1197
+ "dtype": "float16",
1198
+ "format": "f32-to-bf16",
1199
+ "nbytes": 117440512,
1200
+ "byteOffset": 0
1201
+ }
1202
+ ],
1203
+ "md5sum": "741a1aaadd2800d2d7c8d53dbc419704"
1204
+ },
1205
+ {
1206
+ "dataPath": "params_shard_63.bin",
1207
+ "format": "raw-shard",
1208
+ "nbytes": 234881024,
1209
+ "records": [
1210
+ {
1211
+ "name": "model.layers.8.mlp.gate_up_proj.weight",
1212
+ "shape": [
1213
+ 28672,
1214
+ 4096
1215
+ ],
1216
+ "dtype": "float16",
1217
+ "format": "f32-to-bf16",
1218
+ "nbytes": 234881024,
1219
+ "byteOffset": 0
1220
+ }
1221
+ ],
1222
+ "md5sum": "6a2bb53b5592bc8a752e2a53d2ebb556"
1223
+ },
1224
+ {
1225
+ "dataPath": "params_shard_64.bin",
1226
+ "format": "raw-shard",
1227
+ "nbytes": 50331648,
1228
+ "records": [
1229
+ {
1230
+ "name": "model.layers.8.self_attn.qkv_proj.weight",
1231
+ "shape": [
1232
+ 6144,
1233
+ 4096
1234
+ ],
1235
+ "dtype": "float16",
1236
+ "format": "f32-to-bf16",
1237
+ "nbytes": 50331648,
1238
+ "byteOffset": 0
1239
+ }
1240
+ ],
1241
+ "md5sum": "69d787685687a5273cb624f8062a1607"
1242
+ },
1243
+ {
1244
+ "dataPath": "params_shard_65.bin",
1245
+ "format": "raw-shard",
1246
+ "nbytes": 33554432,
1247
+ "records": [
1248
+ {
1249
+ "name": "model.layers.8.self_attn.o_proj.weight",
1250
+ "shape": [
1251
+ 4096,
1252
+ 4096
1253
+ ],
1254
+ "dtype": "float16",
1255
+ "format": "f32-to-bf16",
1256
+ "nbytes": 33554432,
1257
+ "byteOffset": 0
1258
+ }
1259
+ ],
1260
+ "md5sum": "63e433d6f4cec793a14223c706b9d3d9"
1261
+ },
1262
+ {
1263
+ "dataPath": "params_shard_66.bin",
1264
+ "format": "raw-shard",
1265
+ "nbytes": 270336,
1266
+ "records": [
1267
+ {
1268
+ "name": "model.layers.10.input_layernorm.weight",
1269
+ "shape": [
1270
+ 4096
1271
+ ],
1272
+ "dtype": "float16",
1273
+ "format": "f32-to-bf16",
1274
+ "nbytes": 8192,
1275
+ "byteOffset": 0
1276
+ },
1277
+ {
1278
+ "name": "model.layers.10.post_attention_layernorm.weight",
1279
+ "shape": [
1280
+ 4096
1281
+ ],
1282
+ "dtype": "float16",
1283
+ "format": "f32-to-bf16",
1284
+ "nbytes": 8192,
1285
+ "byteOffset": 8192
1286
+ },
1287
+ {
1288
+ "name": "model.layers.11.input_layernorm.weight",
1289
+ "shape": [
1290
+ 4096
1291
+ ],
1292
+ "dtype": "float16",
1293
+ "format": "f32-to-bf16",
1294
+ "nbytes": 8192,
1295
+ "byteOffset": 16384
1296
+ },
1297
+ {
1298
+ "name": "model.layers.11.post_attention_layernorm.weight",
1299
+ "shape": [
1300
+ 4096
1301
+ ],
1302
+ "dtype": "float16",
1303
+ "format": "f32-to-bf16",
1304
+ "nbytes": 8192,
1305
+ "byteOffset": 24576
1306
+ },
1307
+ {
1308
+ "name": "model.layers.12.input_layernorm.weight",
1309
+ "shape": [
1310
+ 4096
1311
+ ],
1312
+ "dtype": "float16",
1313
+ "format": "f32-to-bf16",
1314
+ "nbytes": 8192,
1315
+ "byteOffset": 32768
1316
+ },
1317
+ {
1318
+ "name": "model.layers.12.post_attention_layernorm.weight",
1319
+ "shape": [
1320
+ 4096
1321
+ ],
1322
+ "dtype": "float16",
1323
+ "format": "f32-to-bf16",
1324
+ "nbytes": 8192,
1325
+ "byteOffset": 40960
1326
+ },
1327
+ {
1328
+ "name": "model.layers.13.input_layernorm.weight",
1329
+ "shape": [
1330
+ 4096
1331
+ ],
1332
+ "dtype": "float16",
1333
+ "format": "f32-to-bf16",
1334
+ "nbytes": 8192,
1335
+ "byteOffset": 49152
1336
+ },
1337
+ {
1338
+ "name": "model.layers.13.post_attention_layernorm.weight",
1339
+ "shape": [
1340
+ 4096
1341
+ ],
1342
+ "dtype": "float16",
1343
+ "format": "f32-to-bf16",
1344
+ "nbytes": 8192,
1345
+ "byteOffset": 57344
1346
+ },
1347
+ {
1348
+ "name": "model.layers.14.input_layernorm.weight",
1349
+ "shape": [
1350
+ 4096
1351
+ ],
1352
+ "dtype": "float16",
1353
+ "format": "f32-to-bf16",
1354
+ "nbytes": 8192,
1355
+ "byteOffset": 65536
1356
+ },
1357
+ {
1358
+ "name": "model.layers.14.post_attention_layernorm.weight",
1359
+ "shape": [
1360
+ 4096
1361
+ ],
1362
+ "dtype": "float16",
1363
+ "format": "f32-to-bf16",
1364
+ "nbytes": 8192,
1365
+ "byteOffset": 73728
1366
+ },
1367
+ {
1368
+ "name": "model.layers.15.input_layernorm.weight",
1369
+ "shape": [
1370
+ 4096
1371
+ ],
1372
+ "dtype": "float16",
1373
+ "format": "f32-to-bf16",
1374
+ "nbytes": 8192,
1375
+ "byteOffset": 81920
1376
+ },
1377
+ {
1378
+ "name": "model.layers.15.post_attention_layernorm.weight",
1379
+ "shape": [
1380
+ 4096
1381
+ ],
1382
+ "dtype": "float16",
1383
+ "format": "f32-to-bf16",
1384
+ "nbytes": 8192,
1385
+ "byteOffset": 90112
1386
+ },
1387
+ {
1388
+ "name": "model.layers.9.input_layernorm.weight",
1389
+ "shape": [
1390
+ 4096
1391
+ ],
1392
+ "dtype": "float16",
1393
+ "format": "f32-to-bf16",
1394
+ "nbytes": 8192,
1395
+ "byteOffset": 98304
1396
+ },
1397
+ {
1398
+ "name": "model.layers.9.post_attention_layernorm.weight",
1399
+ "shape": [
1400
+ 4096
1401
+ ],
1402
+ "dtype": "float16",
1403
+ "format": "f32-to-bf16",
1404
+ "nbytes": 8192,
1405
+ "byteOffset": 106496
1406
+ },
1407
+ {
1408
+ "name": "model.norm.weight",
1409
+ "shape": [
1410
+ 4096
1411
+ ],
1412
+ "dtype": "float16",
1413
+ "format": "f32-to-bf16",
1414
+ "nbytes": 8192,
1415
+ "byteOffset": 114688
1416
+ },
1417
+ {
1418
+ "name": "model.layers.0.input_layernorm.weight",
1419
+ "shape": [
1420
+ 4096
1421
+ ],
1422
+ "dtype": "float16",
1423
+ "format": "f32-to-bf16",
1424
+ "nbytes": 8192,
1425
+ "byteOffset": 122880
1426
+ },
1427
+ {
1428
+ "name": "model.layers.0.post_attention_layernorm.weight",
1429
+ "shape": [
1430
+ 4096
1431
+ ],
1432
+ "dtype": "float16",
1433
+ "format": "f32-to-bf16",
1434
+ "nbytes": 8192,
1435
+ "byteOffset": 131072
1436
+ },
1437
+ {
1438
+ "name": "model.layers.1.input_layernorm.weight",
1439
+ "shape": [
1440
+ 4096
1441
+ ],
1442
+ "dtype": "float16",
1443
+ "format": "f32-to-bf16",
1444
+ "nbytes": 8192,
1445
+ "byteOffset": 139264
1446
+ },
1447
+ {
1448
+ "name": "model.layers.1.post_attention_layernorm.weight",
1449
+ "shape": [
1450
+ 4096
1451
+ ],
1452
+ "dtype": "float16",
1453
+ "format": "f32-to-bf16",
1454
+ "nbytes": 8192,
1455
+ "byteOffset": 147456
1456
+ },
1457
+ {
1458
+ "name": "model.layers.2.input_layernorm.weight",
1459
+ "shape": [
1460
+ 4096
1461
+ ],
1462
+ "dtype": "float16",
1463
+ "format": "f32-to-bf16",
1464
+ "nbytes": 8192,
1465
+ "byteOffset": 155648
1466
+ },
1467
+ {
1468
+ "name": "model.layers.2.post_attention_layernorm.weight",
1469
+ "shape": [
1470
+ 4096
1471
+ ],
1472
+ "dtype": "float16",
1473
+ "format": "f32-to-bf16",
1474
+ "nbytes": 8192,
1475
+ "byteOffset": 163840
1476
+ },
1477
+ {
1478
+ "name": "model.layers.3.input_layernorm.weight",
1479
+ "shape": [
1480
+ 4096
1481
+ ],
1482
+ "dtype": "float16",
1483
+ "format": "f32-to-bf16",
1484
+ "nbytes": 8192,
1485
+ "byteOffset": 172032
1486
+ },
1487
+ {
1488
+ "name": "model.layers.3.post_attention_layernorm.weight",
1489
+ "shape": [
1490
+ 4096
1491
+ ],
1492
+ "dtype": "float16",
1493
+ "format": "f32-to-bf16",
1494
+ "nbytes": 8192,
1495
+ "byteOffset": 180224
1496
+ },
1497
+ {
1498
+ "name": "model.layers.4.input_layernorm.weight",
1499
+ "shape": [
1500
+ 4096
1501
+ ],
1502
+ "dtype": "float16",
1503
+ "format": "f32-to-bf16",
1504
+ "nbytes": 8192,
1505
+ "byteOffset": 188416
1506
+ },
1507
+ {
1508
+ "name": "model.layers.4.post_attention_layernorm.weight",
1509
+ "shape": [
1510
+ 4096
1511
+ ],
1512
+ "dtype": "float16",
1513
+ "format": "f32-to-bf16",
1514
+ "nbytes": 8192,
1515
+ "byteOffset": 196608
1516
+ },
1517
+ {
1518
+ "name": "model.layers.5.input_layernorm.weight",
1519
+ "shape": [
1520
+ 4096
1521
+ ],
1522
+ "dtype": "float16",
1523
+ "format": "f32-to-bf16",
1524
+ "nbytes": 8192,
1525
+ "byteOffset": 204800
1526
+ },
1527
+ {
1528
+ "name": "model.layers.5.post_attention_layernorm.weight",
1529
+ "shape": [
1530
+ 4096
1531
+ ],
1532
+ "dtype": "float16",
1533
+ "format": "f32-to-bf16",
1534
+ "nbytes": 8192,
1535
+ "byteOffset": 212992
1536
+ },
1537
+ {
1538
+ "name": "model.layers.6.input_layernorm.weight",
1539
+ "shape": [
1540
+ 4096
1541
+ ],
1542
+ "dtype": "float16",
1543
+ "format": "f32-to-bf16",
1544
+ "nbytes": 8192,
1545
+ "byteOffset": 221184
1546
+ },
1547
+ {
1548
+ "name": "model.layers.6.post_attention_layernorm.weight",
1549
+ "shape": [
1550
+ 4096
1551
+ ],
1552
+ "dtype": "float16",
1553
+ "format": "f32-to-bf16",
1554
+ "nbytes": 8192,
1555
+ "byteOffset": 229376
1556
+ },
1557
+ {
1558
+ "name": "model.layers.7.input_layernorm.weight",
1559
+ "shape": [
1560
+ 4096
1561
+ ],
1562
+ "dtype": "float16",
1563
+ "format": "f32-to-bf16",
1564
+ "nbytes": 8192,
1565
+ "byteOffset": 237568
1566
+ },
1567
+ {
1568
+ "name": "model.layers.7.post_attention_layernorm.weight",
1569
+ "shape": [
1570
+ 4096
1571
+ ],
1572
+ "dtype": "float16",
1573
+ "format": "f32-to-bf16",
1574
+ "nbytes": 8192,
1575
+ "byteOffset": 245760
1576
+ },
1577
+ {
1578
+ "name": "model.layers.8.input_layernorm.weight",
1579
+ "shape": [
1580
+ 4096
1581
+ ],
1582
+ "dtype": "float16",
1583
+ "format": "f32-to-bf16",
1584
+ "nbytes": 8192,
1585
+ "byteOffset": 253952
1586
+ },
1587
+ {
1588
+ "name": "model.layers.8.post_attention_layernorm.weight",
1589
+ "shape": [
1590
+ 4096
1591
+ ],
1592
+ "dtype": "float16",
1593
+ "format": "f32-to-bf16",
1594
+ "nbytes": 8192,
1595
+ "byteOffset": 262144
1596
+ }
1597
+ ],
1598
+ "md5sum": "02d3d0ecea966763230cf838b3702024"
1599
+ }
1600
+ ]
1601
+ }
params_shard_0.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f5c34e0190bfa545f8ac33973b21c81286608102aa74d6ebf601adaf9b83cd40
3
+ size 1050673152
params_shard_1.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:445cab3f332e1708e7c15668d8f351db02b26aa02f00d212401a8aa56face535
3
+ size 117440512
params_shard_10.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:02b486042734a906a87aa531b99dfc629c16477c7fdd4f9d68000b592d07aa36
3
+ size 234881024
params_shard_11.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67ebcd09c7b61eb820640ab6af0ed3bf16e167986d2a799f197a3d745cd0a3b5
3
+ size 50331648
params_shard_12.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:173e92e34f69354b20833e9f5661f45e72654d5234b2f30956e8b4421413743d
3
+ size 33554432
params_shard_13.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ad1aae3d05baeb647b5e64c146fd9d4e86c6701be0c49e8f2ac2ea3a69536fa
3
+ size 117440512
params_shard_14.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:ac056496d030336ad476904bfb7ea7320f2f84eb3666fa51b27d1457213bb66c
3
+ size 234881024
params_shard_15.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:53f2c24c5450df42b9c488af95930b8c5365c58295ce4d8e0e33cb135b29dac6
3
+ size 50331648
params_shard_16.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6de30c9ed4e5d7823be87d44ba193c95b7f37fa8140f5b0f25ff0e920e1289d9
3
+ size 33554432
params_shard_17.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:daa3de4f01ab455a779a3eac359dcd16166169013ce9fe497c7b15d0eedb7f6c
3
+ size 117440512
params_shard_18.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4f7628c2b33255546b4c1df9ecb01f2a18b90001971262ec649c86e900b3f4f1
3
+ size 234881024
params_shard_19.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:37c1c1652641aeb131b3dc9b200d5ecefff8e868292f60c16fb9149a43ae1f5a
3
+ size 50331648
params_shard_2.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:263be2f595582e9047bcb363ca3288f6626c84e8c1561b7a717b6b83267be79d
3
+ size 234881024
params_shard_20.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3926e35b1a084e4c6f9991c76f0bf34f070f9e4e580a7cbc77c468569bfd9fc8
3
+ size 33554432
params_shard_21.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23f9952049e1618886763bf291e9ea0dcbc1f464c27b8caef1de06b1d0f91014
3
+ size 117440512
params_shard_22.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3bf6bd86779af2142944c5db9787ae4f942b2abef4d25ca3485e50a5828a3453
3
+ size 234881024
params_shard_23.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fdb2a2dde30c79b351acd8b6ef3ececf06281f3bdcb7deb89cdcdc92fe50ccf7
3
+ size 50331648
params_shard_24.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6eb40d97a13d68539da132be293e880ac9065a12a6f640939eec6a4550cbf614
3
+ size 33554432
params_shard_25.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a9869ee55e4e177fd8ec5a8841185ab837d386d24eaefc6107a3e8e7b1a97e6b
3
+ size 117440512
params_shard_26.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7d08cc5469cada4c2f648e17bad42162361d0739f3013f1707c3620869b3e340
3
+ size 234881024
params_shard_27.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:999613bb09661471df8048f65c1ea4d66a39a0c83e7039730c22457dfd6faf89
3
+ size 50331648
params_shard_28.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc85df02827c9cca6e0487defed0974588be8ab8bf238b6254d15f6c21546110
3
+ size 33554432
params_shard_29.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5079262126a8af3a99ac8b4ec02a121fa0bc6f62f803ecd7396aa2a17d8a7095
3
+ size 1050673152
params_shard_3.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9931c5a8202aeaa7c41d43d9d58ae1ef2149e4072deac7070f93b83316db8198
3
+ size 50331648
params_shard_30.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a47ca400848bde91815d4a74dcd47dccacbec7a33ac9e4538b1443424a91da4a
3
+ size 117440512
params_shard_31.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9c909118557e35b11d218470acc5d2ec09f9d41ffd86278c2f2b8a9817c9033a
3
+ size 234881024
params_shard_32.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f150331838a158d03bc8270726f4633ee1bc7f9f2a4ae1bd92060a8482f6d5bf
3
+ size 50331648
params_shard_33.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d04f7f419c4906a1a5531ef03294a35a3bf5b8b2539404672fa01025621283ca
3
+ size 33554432
params_shard_34.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e696b5d390e8c13b8500d892bf048d2ffb398e61ae9b770d4e1e4b74947dc487
3
+ size 117440512
params_shard_35.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:07b89e2c8287fbd64ae3e71f57fb2401ef0e24dda084db6ac3ad7bd6e79f560f
3
+ size 234881024
params_shard_36.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d227d81a8a9d3f032031127da39f9a714ca3732661df6c787f032931197e54f1
3
+ size 50331648
params_shard_37.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d81c41260245c18f8ba541e846905cb75c5a418078ca4199636fed80c64b302
3
+ size 33554432
params_shard_38.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:438f9e025a5d58b9c3625efd5567717722f80eef56e90a5dbf1b8e69138aedd5
3
+ size 117440512
params_shard_39.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2066071808b920d6fdf31c5031c9b3fa2d69dcaed7e789cb028e555f947f2ed9
3
+ size 234881024
params_shard_4.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd069648bd58b8c2194c16775a6d4c8c239f5e6485b74c343f486a33ca5d48eb
3
+ size 33554432
params_shard_40.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:693a5ce806cf32c16a2da21385068a08756e127773bbf8b97de064b1aa43f183
3
+ size 50331648
params_shard_41.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:591a6c8b26b5a69d0db028b53a3987e97dcb7d3e6a7e96d931f62452d4756388
3
+ size 33554432
params_shard_42.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3ec9605a7605af40ff58c8818ef421f6d4902ff1c69d40e66709f20e6ea5f5ba
3
+ size 117440512
params_shard_43.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:86af62dd11f1cdc4bd8b1def09c5f438dd82310277a9ba6ba606bd6997c87a44
3
+ size 234881024
params_shard_44.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3b8e010c5c1b064972df0a4953e5dc767ebb587f45739b70cb4423bbd2963073
3
+ size 50331648
params_shard_45.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74fb4ec2845b42970906b0d2f7e3b20d11427027969b4ef408529274865f3741
3
+ size 33554432
params_shard_46.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a8d100dc4c70d520d05162d9fe950612efa9af5e33bc1dd644f0a5ffd263767f
3
+ size 117440512
params_shard_47.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:45bc611a052af130d8af8f5065c1eef1d58ac8f16e8ab465699ef8fff191d16b
3
+ size 234881024
params_shard_48.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5279789df8f896561bac02bb8693a9621596fbced8f70f97173d09898ed365bb
3
+ size 50331648
params_shard_49.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e2216e46e89b63b479b7705a2c9d5d9d70e1035a47a70822dbda4d74ebb7330
3
+ size 33554432
params_shard_5.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:038ae274d307d9760eb0bc8e3edc6421db057085579ec75aba0e2291f4697df2
3
+ size 117440512
params_shard_50.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:167742d288fb594794b28d5d6ba92d26c3a2b514c1f9118ad08d80d7519fe2ed
3
+ size 117440512
params_shard_51.bin ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4cc0fe470439cf00fed798a4f425fc1bbac081264fce2feb2df697f7b96557cb
3
+ size 234881024