Upload MixtralForCausalLM
Browse files- config.json +51 -2
- model-00001-of-00015.safetensors +2 -2
- model-00002-of-00015.safetensors +2 -2
- model-00003-of-00015.safetensors +2 -2
- model-00004-of-00015.safetensors +2 -2
- model-00005-of-00015.safetensors +2 -2
- model-00006-of-00015.safetensors +2 -2
- model-00007-of-00015.safetensors +2 -2
- model-00008-of-00015.safetensors +2 -2
- model-00009-of-00015.safetensors +2 -2
- model-00010-of-00015.safetensors +2 -2
- model-00011-of-00015.safetensors +2 -2
- model-00012-of-00015.safetensors +2 -2
- model-00013-of-00015.safetensors +2 -2
- model-00014-of-00015.safetensors +2 -2
- model-00015-of-00015.safetensors +2 -2
- model.safetensors.index.json +57 -233
config.json
CHANGED
@@ -24,7 +24,56 @@
|
|
24 |
"dataset": "wikitext2",
|
25 |
"desc_act": false,
|
26 |
"group_size": 128,
|
27 |
-
"modules_in_block_to_quantize":
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
28 |
"quant_method": "gptq",
|
29 |
"sym": true,
|
30 |
"true_sequential": true
|
@@ -36,6 +85,6 @@
|
|
36 |
"tie_word_embeddings": false,
|
37 |
"torch_dtype": "float16",
|
38 |
"transformers_version": "4.39.3",
|
39 |
-
"use_cache":
|
40 |
"vocab_size": 32768
|
41 |
}
|
|
|
24 |
"dataset": "wikitext2",
|
25 |
"desc_act": false,
|
26 |
"group_size": 128,
|
27 |
+
"modules_in_block_to_quantize": [
|
28 |
+
[
|
29 |
+
"self_attn.k_proj",
|
30 |
+
"self_attn.v_proj",
|
31 |
+
"self_attn.q_proj"
|
32 |
+
],
|
33 |
+
[
|
34 |
+
"self_attn.o_proj"
|
35 |
+
],
|
36 |
+
[
|
37 |
+
"block_sparse_moe.experts.0.w1",
|
38 |
+
"block_sparse_moe.experts.0.w2",
|
39 |
+
"block_sparse_moe.experts.0.w3"
|
40 |
+
],
|
41 |
+
[
|
42 |
+
"block_sparse_moe.experts.1.w1",
|
43 |
+
"block_sparse_moe.experts.1.w2",
|
44 |
+
"block_sparse_moe.experts.1.w3"
|
45 |
+
],
|
46 |
+
[
|
47 |
+
"block_sparse_moe.experts.2.w1",
|
48 |
+
"block_sparse_moe.experts.2.w2",
|
49 |
+
"block_sparse_moe.experts.2.w3"
|
50 |
+
],
|
51 |
+
[
|
52 |
+
"block_sparse_moe.experts.3.w1",
|
53 |
+
"block_sparse_moe.experts.3.w2",
|
54 |
+
"block_sparse_moe.experts.3.w3"
|
55 |
+
],
|
56 |
+
[
|
57 |
+
"block_sparse_moe.experts.4.w1",
|
58 |
+
"block_sparse_moe.experts.4.w2",
|
59 |
+
"block_sparse_moe.experts.4.w3"
|
60 |
+
],
|
61 |
+
[
|
62 |
+
"block_sparse_moe.experts.5.w1",
|
63 |
+
"block_sparse_moe.experts.5.w2",
|
64 |
+
"block_sparse_moe.experts.5.w3"
|
65 |
+
],
|
66 |
+
[
|
67 |
+
"block_sparse_moe.experts.6.w1",
|
68 |
+
"block_sparse_moe.experts.6.w2",
|
69 |
+
"block_sparse_moe.experts.6.w3"
|
70 |
+
],
|
71 |
+
[
|
72 |
+
"block_sparse_moe.experts.7.w1",
|
73 |
+
"block_sparse_moe.experts.7.w2",
|
74 |
+
"block_sparse_moe.experts.7.w3"
|
75 |
+
]
|
76 |
+
],
|
77 |
"quant_method": "gptq",
|
78 |
"sym": true,
|
79 |
"true_sequential": true
|
|
|
85 |
"tie_word_embeddings": false,
|
86 |
"torch_dtype": "float16",
|
87 |
"transformers_version": "4.39.3",
|
88 |
+
"use_cache": true,
|
89 |
"vocab_size": 32768
|
90 |
}
|
model-00001-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ee7fe4d38e1c900d0e5e5a251bab06fac3c173df8818109e0af8e7f89a44b124
|
3 |
+
size 4985132728
|
model-00002-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:2fbe866a1d6c91cace9b51846bf298cbdd97c371e86475ab4ad52db3b8ad28ec
|
3 |
+
size 4999760248
|
model-00003-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:39992a82031ba7b33200137ed6a9b5786672e2c3d6bc05627a188b00ec2cdc95
|
3 |
+
size 4950686368
|
model-00004-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e799d93d64e39ea2808305fc9e5f043554e843bb7aec083722ab2f42a7c5876f
|
3 |
+
size 4955398880
|
model-00005-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:a0d2de9aaf7de39c018fc23ef4f9cb370de165c195d711778cfa117912f9e536
|
3 |
+
size 4999760800
|
model-00006-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9378fb98aa10078fd99764cefb73895acc1399f3ff7c1d342a58ca40bbaf01b
|
3 |
+
size 4950686728
|
model-00007-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:ed74705bb7f52550dc74e59464fabedc311fdebc728e925dddaba91d87e9de16
|
3 |
+
size 4999760800
|
model-00008-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:9b1a7379f8ffa1a847e7f320644d9d818d7c04ebcedeb22ab32d61d140bd4aa9
|
3 |
+
size 4950686728
|
model-00009-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:556b48d690c2598bcec7836a203e39af86126077f4618af669b52b8971e618c7
|
3 |
+
size 4999760800
|
model-00010-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bdb2b8a2b7721d090a77dc7095d4768526323f14c71b9a1698bf7c01b6b810a5
|
3 |
+
size 4957029456
|
model-00011-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:8189ea0ca0d64236746e00d2a32f50cd8cfb5063a052edbc5cb8c385113cf0ad
|
3 |
+
size 4999760800
|
model-00012-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:5614341acb2b11941f9a25cf6acb0a065c1d9b87f6c869c12eec4f0eecdc4ea0
|
3 |
+
size 4950686728
|
model-00013-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:bb7b10e681f268eb7753efd3adea3e64778a3b6485e970554386a6e330cc3dd9
|
3 |
+
size 4999760800
|
model-00014-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e9de71eac6df45a8dc50a7f1383faa5b8a6767fe126501ffa629e1dc8cea147b
|
3 |
+
size 4950686728
|
model-00015-of-00015.safetensors
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:054531e9c6efdeb6531a74d19e435df5f60a367677816caf4cd1b4ce45dff328
|
3 |
+
size 4107731232
|
model.safetensors.index.json
CHANGED
@@ -1,6 +1,6 @@
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
-
"total_size":
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00015-of-00015.safetensors",
|
@@ -125,11 +125,7 @@
|
|
125 |
"model.layers.0.block_sparse_moe.experts.7.w3.qweight": "model-00001-of-00015.safetensors",
|
126 |
"model.layers.0.block_sparse_moe.experts.7.w3.qzeros": "model-00001-of-00015.safetensors",
|
127 |
"model.layers.0.block_sparse_moe.experts.7.w3.scales": "model-00001-of-00015.safetensors",
|
128 |
-
"model.layers.0.block_sparse_moe.gate.
|
129 |
-
"model.layers.0.block_sparse_moe.gate.g_idx": "model-00001-of-00015.safetensors",
|
130 |
-
"model.layers.0.block_sparse_moe.gate.qweight": "model-00001-of-00015.safetensors",
|
131 |
-
"model.layers.0.block_sparse_moe.gate.qzeros": "model-00001-of-00015.safetensors",
|
132 |
-
"model.layers.0.block_sparse_moe.gate.scales": "model-00001-of-00015.safetensors",
|
133 |
"model.layers.0.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
134 |
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
135 |
"model.layers.0.self_attn.k_proj.bias": "model-00001-of-00015.safetensors",
|
@@ -272,10 +268,7 @@
|
|
272 |
"model.layers.1.block_sparse_moe.experts.7.w3.qweight": "model-00001-of-00015.safetensors",
|
273 |
"model.layers.1.block_sparse_moe.experts.7.w3.qzeros": "model-00001-of-00015.safetensors",
|
274 |
"model.layers.1.block_sparse_moe.experts.7.w3.scales": "model-00001-of-00015.safetensors",
|
275 |
-
"model.layers.1.block_sparse_moe.gate.
|
276 |
-
"model.layers.1.block_sparse_moe.gate.g_idx": "model-00001-of-00015.safetensors",
|
277 |
-
"model.layers.1.block_sparse_moe.gate.qweight": "model-00001-of-00015.safetensors",
|
278 |
-
"model.layers.1.block_sparse_moe.gate.scales": "model-00001-of-00015.safetensors",
|
279 |
"model.layers.1.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
280 |
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
281 |
"model.layers.1.self_attn.k_proj.bias": "model-00001-of-00015.safetensors",
|
@@ -418,10 +411,7 @@
|
|
418 |
"model.layers.10.block_sparse_moe.experts.7.w3.qweight": "model-00003-of-00015.safetensors",
|
419 |
"model.layers.10.block_sparse_moe.experts.7.w3.qzeros": "model-00003-of-00015.safetensors",
|
420 |
"model.layers.10.block_sparse_moe.experts.7.w3.scales": "model-00003-of-00015.safetensors",
|
421 |
-
"model.layers.10.block_sparse_moe.gate.
|
422 |
-
"model.layers.10.block_sparse_moe.gate.g_idx": "model-00003-of-00015.safetensors",
|
423 |
-
"model.layers.10.block_sparse_moe.gate.qweight": "model-00003-of-00015.safetensors",
|
424 |
-
"model.layers.10.block_sparse_moe.gate.scales": "model-00003-of-00015.safetensors",
|
425 |
"model.layers.10.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
426 |
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
427 |
"model.layers.10.self_attn.k_proj.bias": "model-00003-of-00015.safetensors",
|
@@ -564,10 +554,7 @@
|
|
564 |
"model.layers.11.block_sparse_moe.experts.7.w3.qweight": "model-00004-of-00015.safetensors",
|
565 |
"model.layers.11.block_sparse_moe.experts.7.w3.qzeros": "model-00004-of-00015.safetensors",
|
566 |
"model.layers.11.block_sparse_moe.experts.7.w3.scales": "model-00004-of-00015.safetensors",
|
567 |
-
"model.layers.11.block_sparse_moe.gate.
|
568 |
-
"model.layers.11.block_sparse_moe.gate.g_idx": "model-00004-of-00015.safetensors",
|
569 |
-
"model.layers.11.block_sparse_moe.gate.qweight": "model-00004-of-00015.safetensors",
|
570 |
-
"model.layers.11.block_sparse_moe.gate.scales": "model-00004-of-00015.safetensors",
|
571 |
"model.layers.11.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
572 |
"model.layers.11.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
573 |
"model.layers.11.self_attn.k_proj.bias": "model-00003-of-00015.safetensors",
|
@@ -710,10 +697,7 @@
|
|
710 |
"model.layers.12.block_sparse_moe.experts.7.w3.qweight": "model-00004-of-00015.safetensors",
|
711 |
"model.layers.12.block_sparse_moe.experts.7.w3.qzeros": "model-00004-of-00015.safetensors",
|
712 |
"model.layers.12.block_sparse_moe.experts.7.w3.scales": "model-00004-of-00015.safetensors",
|
713 |
-
"model.layers.12.block_sparse_moe.gate.
|
714 |
-
"model.layers.12.block_sparse_moe.gate.g_idx": "model-00004-of-00015.safetensors",
|
715 |
-
"model.layers.12.block_sparse_moe.gate.qweight": "model-00004-of-00015.safetensors",
|
716 |
-
"model.layers.12.block_sparse_moe.gate.scales": "model-00004-of-00015.safetensors",
|
717 |
"model.layers.12.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
718 |
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
719 |
"model.layers.12.self_attn.k_proj.bias": "model-00004-of-00015.safetensors",
|
@@ -856,10 +840,7 @@
|
|
856 |
"model.layers.13.block_sparse_moe.experts.7.w3.qweight": "model-00004-of-00015.safetensors",
|
857 |
"model.layers.13.block_sparse_moe.experts.7.w3.qzeros": "model-00004-of-00015.safetensors",
|
858 |
"model.layers.13.block_sparse_moe.experts.7.w3.scales": "model-00004-of-00015.safetensors",
|
859 |
-
"model.layers.13.block_sparse_moe.gate.
|
860 |
-
"model.layers.13.block_sparse_moe.gate.g_idx": "model-00004-of-00015.safetensors",
|
861 |
-
"model.layers.13.block_sparse_moe.gate.qweight": "model-00004-of-00015.safetensors",
|
862 |
-
"model.layers.13.block_sparse_moe.gate.scales": "model-00004-of-00015.safetensors",
|
863 |
"model.layers.13.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
864 |
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
865 |
"model.layers.13.self_attn.k_proj.bias": "model-00004-of-00015.safetensors",
|
@@ -1002,10 +983,7 @@
|
|
1002 |
"model.layers.14.block_sparse_moe.experts.7.w3.qweight": "model-00005-of-00015.safetensors",
|
1003 |
"model.layers.14.block_sparse_moe.experts.7.w3.qzeros": "model-00005-of-00015.safetensors",
|
1004 |
"model.layers.14.block_sparse_moe.experts.7.w3.scales": "model-00005-of-00015.safetensors",
|
1005 |
-
"model.layers.14.block_sparse_moe.gate.
|
1006 |
-
"model.layers.14.block_sparse_moe.gate.g_idx": "model-00005-of-00015.safetensors",
|
1007 |
-
"model.layers.14.block_sparse_moe.gate.qweight": "model-00005-of-00015.safetensors",
|
1008 |
-
"model.layers.14.block_sparse_moe.gate.scales": "model-00005-of-00015.safetensors",
|
1009 |
"model.layers.14.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
1010 |
"model.layers.14.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
1011 |
"model.layers.14.self_attn.k_proj.bias": "model-00004-of-00015.safetensors",
|
@@ -1148,11 +1126,7 @@
|
|
1148 |
"model.layers.15.block_sparse_moe.experts.7.w3.qweight": "model-00005-of-00015.safetensors",
|
1149 |
"model.layers.15.block_sparse_moe.experts.7.w3.qzeros": "model-00005-of-00015.safetensors",
|
1150 |
"model.layers.15.block_sparse_moe.experts.7.w3.scales": "model-00005-of-00015.safetensors",
|
1151 |
-
"model.layers.15.block_sparse_moe.gate.
|
1152 |
-
"model.layers.15.block_sparse_moe.gate.g_idx": "model-00005-of-00015.safetensors",
|
1153 |
-
"model.layers.15.block_sparse_moe.gate.qweight": "model-00005-of-00015.safetensors",
|
1154 |
-
"model.layers.15.block_sparse_moe.gate.qzeros": "model-00005-of-00015.safetensors",
|
1155 |
-
"model.layers.15.block_sparse_moe.gate.scales": "model-00005-of-00015.safetensors",
|
1156 |
"model.layers.15.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
1157 |
"model.layers.15.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
1158 |
"model.layers.15.self_attn.k_proj.bias": "model-00005-of-00015.safetensors",
|
@@ -1295,10 +1269,7 @@
|
|
1295 |
"model.layers.16.block_sparse_moe.experts.7.w3.qweight": "model-00005-of-00015.safetensors",
|
1296 |
"model.layers.16.block_sparse_moe.experts.7.w3.qzeros": "model-00005-of-00015.safetensors",
|
1297 |
"model.layers.16.block_sparse_moe.experts.7.w3.scales": "model-00005-of-00015.safetensors",
|
1298 |
-
"model.layers.16.block_sparse_moe.gate.
|
1299 |
-
"model.layers.16.block_sparse_moe.gate.g_idx": "model-00005-of-00015.safetensors",
|
1300 |
-
"model.layers.16.block_sparse_moe.gate.qweight": "model-00005-of-00015.safetensors",
|
1301 |
-
"model.layers.16.block_sparse_moe.gate.scales": "model-00005-of-00015.safetensors",
|
1302 |
"model.layers.16.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
1303 |
"model.layers.16.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
1304 |
"model.layers.16.self_attn.k_proj.bias": "model-00005-of-00015.safetensors",
|
@@ -1441,10 +1412,7 @@
|
|
1441 |
"model.layers.17.block_sparse_moe.experts.7.w3.qweight": "model-00005-of-00015.safetensors",
|
1442 |
"model.layers.17.block_sparse_moe.experts.7.w3.qzeros": "model-00005-of-00015.safetensors",
|
1443 |
"model.layers.17.block_sparse_moe.experts.7.w3.scales": "model-00005-of-00015.safetensors",
|
1444 |
-
"model.layers.17.block_sparse_moe.gate.
|
1445 |
-
"model.layers.17.block_sparse_moe.gate.g_idx": "model-00005-of-00015.safetensors",
|
1446 |
-
"model.layers.17.block_sparse_moe.gate.qweight": "model-00005-of-00015.safetensors",
|
1447 |
-
"model.layers.17.block_sparse_moe.gate.scales": "model-00005-of-00015.safetensors",
|
1448 |
"model.layers.17.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
1449 |
"model.layers.17.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
1450 |
"model.layers.17.self_attn.k_proj.bias": "model-00005-of-00015.safetensors",
|
@@ -1587,10 +1555,7 @@
|
|
1587 |
"model.layers.18.block_sparse_moe.experts.7.w3.qweight": "model-00006-of-00015.safetensors",
|
1588 |
"model.layers.18.block_sparse_moe.experts.7.w3.qzeros": "model-00006-of-00015.safetensors",
|
1589 |
"model.layers.18.block_sparse_moe.experts.7.w3.scales": "model-00006-of-00015.safetensors",
|
1590 |
-
"model.layers.18.block_sparse_moe.gate.
|
1591 |
-
"model.layers.18.block_sparse_moe.gate.g_idx": "model-00006-of-00015.safetensors",
|
1592 |
-
"model.layers.18.block_sparse_moe.gate.qweight": "model-00006-of-00015.safetensors",
|
1593 |
-
"model.layers.18.block_sparse_moe.gate.scales": "model-00006-of-00015.safetensors",
|
1594 |
"model.layers.18.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
1595 |
"model.layers.18.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
1596 |
"model.layers.18.self_attn.k_proj.bias": "model-00005-of-00015.safetensors",
|
@@ -1733,10 +1698,7 @@
|
|
1733 |
"model.layers.19.block_sparse_moe.experts.7.w3.qweight": "model-00006-of-00015.safetensors",
|
1734 |
"model.layers.19.block_sparse_moe.experts.7.w3.qzeros": "model-00006-of-00015.safetensors",
|
1735 |
"model.layers.19.block_sparse_moe.experts.7.w3.scales": "model-00006-of-00015.safetensors",
|
1736 |
-
"model.layers.19.block_sparse_moe.gate.
|
1737 |
-
"model.layers.19.block_sparse_moe.gate.g_idx": "model-00006-of-00015.safetensors",
|
1738 |
-
"model.layers.19.block_sparse_moe.gate.qweight": "model-00006-of-00015.safetensors",
|
1739 |
-
"model.layers.19.block_sparse_moe.gate.scales": "model-00006-of-00015.safetensors",
|
1740 |
"model.layers.19.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
1741 |
"model.layers.19.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
1742 |
"model.layers.19.self_attn.k_proj.bias": "model-00006-of-00015.safetensors",
|
@@ -1879,10 +1841,7 @@
|
|
1879 |
"model.layers.2.block_sparse_moe.experts.7.w3.qweight": "model-00001-of-00015.safetensors",
|
1880 |
"model.layers.2.block_sparse_moe.experts.7.w3.qzeros": "model-00001-of-00015.safetensors",
|
1881 |
"model.layers.2.block_sparse_moe.experts.7.w3.scales": "model-00001-of-00015.safetensors",
|
1882 |
-
"model.layers.2.block_sparse_moe.gate.
|
1883 |
-
"model.layers.2.block_sparse_moe.gate.g_idx": "model-00001-of-00015.safetensors",
|
1884 |
-
"model.layers.2.block_sparse_moe.gate.qweight": "model-00001-of-00015.safetensors",
|
1885 |
-
"model.layers.2.block_sparse_moe.gate.scales": "model-00001-of-00015.safetensors",
|
1886 |
"model.layers.2.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
1887 |
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
1888 |
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00015.safetensors",
|
@@ -2025,10 +1984,7 @@
|
|
2025 |
"model.layers.20.block_sparse_moe.experts.7.w3.qweight": "model-00006-of-00015.safetensors",
|
2026 |
"model.layers.20.block_sparse_moe.experts.7.w3.qzeros": "model-00006-of-00015.safetensors",
|
2027 |
"model.layers.20.block_sparse_moe.experts.7.w3.scales": "model-00006-of-00015.safetensors",
|
2028 |
-
"model.layers.20.block_sparse_moe.gate.
|
2029 |
-
"model.layers.20.block_sparse_moe.gate.g_idx": "model-00006-of-00015.safetensors",
|
2030 |
-
"model.layers.20.block_sparse_moe.gate.qweight": "model-00006-of-00015.safetensors",
|
2031 |
-
"model.layers.20.block_sparse_moe.gate.scales": "model-00006-of-00015.safetensors",
|
2032 |
"model.layers.20.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
2033 |
"model.layers.20.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
2034 |
"model.layers.20.self_attn.k_proj.bias": "model-00006-of-00015.safetensors",
|
@@ -2171,10 +2127,7 @@
|
|
2171 |
"model.layers.21.block_sparse_moe.experts.7.w3.qweight": "model-00006-of-00015.safetensors",
|
2172 |
"model.layers.21.block_sparse_moe.experts.7.w3.qzeros": "model-00006-of-00015.safetensors",
|
2173 |
"model.layers.21.block_sparse_moe.experts.7.w3.scales": "model-00006-of-00015.safetensors",
|
2174 |
-
"model.layers.21.block_sparse_moe.gate.
|
2175 |
-
"model.layers.21.block_sparse_moe.gate.g_idx": "model-00006-of-00015.safetensors",
|
2176 |
-
"model.layers.21.block_sparse_moe.gate.qweight": "model-00006-of-00015.safetensors",
|
2177 |
-
"model.layers.21.block_sparse_moe.gate.scales": "model-00006-of-00015.safetensors",
|
2178 |
"model.layers.21.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
2179 |
"model.layers.21.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
2180 |
"model.layers.21.self_attn.k_proj.bias": "model-00006-of-00015.safetensors",
|
@@ -2317,10 +2270,7 @@
|
|
2317 |
"model.layers.22.block_sparse_moe.experts.7.w3.qweight": "model-00007-of-00015.safetensors",
|
2318 |
"model.layers.22.block_sparse_moe.experts.7.w3.qzeros": "model-00007-of-00015.safetensors",
|
2319 |
"model.layers.22.block_sparse_moe.experts.7.w3.scales": "model-00007-of-00015.safetensors",
|
2320 |
-
"model.layers.22.block_sparse_moe.gate.
|
2321 |
-
"model.layers.22.block_sparse_moe.gate.g_idx": "model-00007-of-00015.safetensors",
|
2322 |
-
"model.layers.22.block_sparse_moe.gate.qweight": "model-00007-of-00015.safetensors",
|
2323 |
-
"model.layers.22.block_sparse_moe.gate.scales": "model-00007-of-00015.safetensors",
|
2324 |
"model.layers.22.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
2325 |
"model.layers.22.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
2326 |
"model.layers.22.self_attn.k_proj.bias": "model-00006-of-00015.safetensors",
|
@@ -2463,11 +2413,7 @@
|
|
2463 |
"model.layers.23.block_sparse_moe.experts.7.w3.qweight": "model-00007-of-00015.safetensors",
|
2464 |
"model.layers.23.block_sparse_moe.experts.7.w3.qzeros": "model-00007-of-00015.safetensors",
|
2465 |
"model.layers.23.block_sparse_moe.experts.7.w3.scales": "model-00007-of-00015.safetensors",
|
2466 |
-
"model.layers.23.block_sparse_moe.gate.
|
2467 |
-
"model.layers.23.block_sparse_moe.gate.g_idx": "model-00007-of-00015.safetensors",
|
2468 |
-
"model.layers.23.block_sparse_moe.gate.qweight": "model-00007-of-00015.safetensors",
|
2469 |
-
"model.layers.23.block_sparse_moe.gate.qzeros": "model-00007-of-00015.safetensors",
|
2470 |
-
"model.layers.23.block_sparse_moe.gate.scales": "model-00007-of-00015.safetensors",
|
2471 |
"model.layers.23.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
2472 |
"model.layers.23.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
2473 |
"model.layers.23.self_attn.k_proj.bias": "model-00007-of-00015.safetensors",
|
@@ -2610,10 +2556,7 @@
|
|
2610 |
"model.layers.24.block_sparse_moe.experts.7.w3.qweight": "model-00007-of-00015.safetensors",
|
2611 |
"model.layers.24.block_sparse_moe.experts.7.w3.qzeros": "model-00007-of-00015.safetensors",
|
2612 |
"model.layers.24.block_sparse_moe.experts.7.w3.scales": "model-00007-of-00015.safetensors",
|
2613 |
-
"model.layers.24.block_sparse_moe.gate.
|
2614 |
-
"model.layers.24.block_sparse_moe.gate.g_idx": "model-00007-of-00015.safetensors",
|
2615 |
-
"model.layers.24.block_sparse_moe.gate.qweight": "model-00007-of-00015.safetensors",
|
2616 |
-
"model.layers.24.block_sparse_moe.gate.scales": "model-00007-of-00015.safetensors",
|
2617 |
"model.layers.24.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
2618 |
"model.layers.24.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
2619 |
"model.layers.24.self_attn.k_proj.bias": "model-00007-of-00015.safetensors",
|
@@ -2756,10 +2699,7 @@
|
|
2756 |
"model.layers.25.block_sparse_moe.experts.7.w3.qweight": "model-00007-of-00015.safetensors",
|
2757 |
"model.layers.25.block_sparse_moe.experts.7.w3.qzeros": "model-00007-of-00015.safetensors",
|
2758 |
"model.layers.25.block_sparse_moe.experts.7.w3.scales": "model-00007-of-00015.safetensors",
|
2759 |
-
"model.layers.25.block_sparse_moe.gate.
|
2760 |
-
"model.layers.25.block_sparse_moe.gate.g_idx": "model-00007-of-00015.safetensors",
|
2761 |
-
"model.layers.25.block_sparse_moe.gate.qweight": "model-00007-of-00015.safetensors",
|
2762 |
-
"model.layers.25.block_sparse_moe.gate.scales": "model-00007-of-00015.safetensors",
|
2763 |
"model.layers.25.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
2764 |
"model.layers.25.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
2765 |
"model.layers.25.self_attn.k_proj.bias": "model-00007-of-00015.safetensors",
|
@@ -2902,10 +2842,7 @@
|
|
2902 |
"model.layers.26.block_sparse_moe.experts.7.w3.qweight": "model-00008-of-00015.safetensors",
|
2903 |
"model.layers.26.block_sparse_moe.experts.7.w3.qzeros": "model-00008-of-00015.safetensors",
|
2904 |
"model.layers.26.block_sparse_moe.experts.7.w3.scales": "model-00008-of-00015.safetensors",
|
2905 |
-
"model.layers.26.block_sparse_moe.gate.
|
2906 |
-
"model.layers.26.block_sparse_moe.gate.g_idx": "model-00008-of-00015.safetensors",
|
2907 |
-
"model.layers.26.block_sparse_moe.gate.qweight": "model-00008-of-00015.safetensors",
|
2908 |
-
"model.layers.26.block_sparse_moe.gate.scales": "model-00008-of-00015.safetensors",
|
2909 |
"model.layers.26.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
2910 |
"model.layers.26.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
2911 |
"model.layers.26.self_attn.k_proj.bias": "model-00007-of-00015.safetensors",
|
@@ -3048,10 +2985,7 @@
|
|
3048 |
"model.layers.27.block_sparse_moe.experts.7.w3.qweight": "model-00008-of-00015.safetensors",
|
3049 |
"model.layers.27.block_sparse_moe.experts.7.w3.qzeros": "model-00008-of-00015.safetensors",
|
3050 |
"model.layers.27.block_sparse_moe.experts.7.w3.scales": "model-00008-of-00015.safetensors",
|
3051 |
-
"model.layers.27.block_sparse_moe.gate.
|
3052 |
-
"model.layers.27.block_sparse_moe.gate.g_idx": "model-00008-of-00015.safetensors",
|
3053 |
-
"model.layers.27.block_sparse_moe.gate.qweight": "model-00008-of-00015.safetensors",
|
3054 |
-
"model.layers.27.block_sparse_moe.gate.scales": "model-00008-of-00015.safetensors",
|
3055 |
"model.layers.27.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
3056 |
"model.layers.27.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
3057 |
"model.layers.27.self_attn.k_proj.bias": "model-00008-of-00015.safetensors",
|
@@ -3194,10 +3128,7 @@
|
|
3194 |
"model.layers.28.block_sparse_moe.experts.7.w3.qweight": "model-00008-of-00015.safetensors",
|
3195 |
"model.layers.28.block_sparse_moe.experts.7.w3.qzeros": "model-00008-of-00015.safetensors",
|
3196 |
"model.layers.28.block_sparse_moe.experts.7.w3.scales": "model-00008-of-00015.safetensors",
|
3197 |
-
"model.layers.28.block_sparse_moe.gate.
|
3198 |
-
"model.layers.28.block_sparse_moe.gate.g_idx": "model-00008-of-00015.safetensors",
|
3199 |
-
"model.layers.28.block_sparse_moe.gate.qweight": "model-00008-of-00015.safetensors",
|
3200 |
-
"model.layers.28.block_sparse_moe.gate.scales": "model-00008-of-00015.safetensors",
|
3201 |
"model.layers.28.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
3202 |
"model.layers.28.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
3203 |
"model.layers.28.self_attn.k_proj.bias": "model-00008-of-00015.safetensors",
|
@@ -3340,10 +3271,7 @@
|
|
3340 |
"model.layers.29.block_sparse_moe.experts.7.w3.qweight": "model-00008-of-00015.safetensors",
|
3341 |
"model.layers.29.block_sparse_moe.experts.7.w3.qzeros": "model-00008-of-00015.safetensors",
|
3342 |
"model.layers.29.block_sparse_moe.experts.7.w3.scales": "model-00008-of-00015.safetensors",
|
3343 |
-
"model.layers.29.block_sparse_moe.gate.
|
3344 |
-
"model.layers.29.block_sparse_moe.gate.g_idx": "model-00008-of-00015.safetensors",
|
3345 |
-
"model.layers.29.block_sparse_moe.gate.qweight": "model-00008-of-00015.safetensors",
|
3346 |
-
"model.layers.29.block_sparse_moe.gate.scales": "model-00008-of-00015.safetensors",
|
3347 |
"model.layers.29.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
3348 |
"model.layers.29.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
3349 |
"model.layers.29.self_attn.k_proj.bias": "model-00008-of-00015.safetensors",
|
@@ -3486,10 +3414,7 @@
|
|
3486 |
"model.layers.3.block_sparse_moe.experts.7.w3.qweight": "model-00002-of-00015.safetensors",
|
3487 |
"model.layers.3.block_sparse_moe.experts.7.w3.qzeros": "model-00002-of-00015.safetensors",
|
3488 |
"model.layers.3.block_sparse_moe.experts.7.w3.scales": "model-00002-of-00015.safetensors",
|
3489 |
-
"model.layers.3.block_sparse_moe.gate.
|
3490 |
-
"model.layers.3.block_sparse_moe.gate.g_idx": "model-00002-of-00015.safetensors",
|
3491 |
-
"model.layers.3.block_sparse_moe.gate.qweight": "model-00002-of-00015.safetensors",
|
3492 |
-
"model.layers.3.block_sparse_moe.gate.scales": "model-00002-of-00015.safetensors",
|
3493 |
"model.layers.3.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
3494 |
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
3495 |
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00015.safetensors",
|
@@ -3632,10 +3557,7 @@
|
|
3632 |
"model.layers.30.block_sparse_moe.experts.7.w3.qweight": "model-00009-of-00015.safetensors",
|
3633 |
"model.layers.30.block_sparse_moe.experts.7.w3.qzeros": "model-00009-of-00015.safetensors",
|
3634 |
"model.layers.30.block_sparse_moe.experts.7.w3.scales": "model-00009-of-00015.safetensors",
|
3635 |
-
"model.layers.30.block_sparse_moe.gate.
|
3636 |
-
"model.layers.30.block_sparse_moe.gate.g_idx": "model-00009-of-00015.safetensors",
|
3637 |
-
"model.layers.30.block_sparse_moe.gate.qweight": "model-00009-of-00015.safetensors",
|
3638 |
-
"model.layers.30.block_sparse_moe.gate.scales": "model-00009-of-00015.safetensors",
|
3639 |
"model.layers.30.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
3640 |
"model.layers.30.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
3641 |
"model.layers.30.self_attn.k_proj.bias": "model-00008-of-00015.safetensors",
|
@@ -3778,11 +3700,7 @@
|
|
3778 |
"model.layers.31.block_sparse_moe.experts.7.w3.qweight": "model-00009-of-00015.safetensors",
|
3779 |
"model.layers.31.block_sparse_moe.experts.7.w3.qzeros": "model-00009-of-00015.safetensors",
|
3780 |
"model.layers.31.block_sparse_moe.experts.7.w3.scales": "model-00009-of-00015.safetensors",
|
3781 |
-
"model.layers.31.block_sparse_moe.gate.
|
3782 |
-
"model.layers.31.block_sparse_moe.gate.g_idx": "model-00009-of-00015.safetensors",
|
3783 |
-
"model.layers.31.block_sparse_moe.gate.qweight": "model-00009-of-00015.safetensors",
|
3784 |
-
"model.layers.31.block_sparse_moe.gate.qzeros": "model-00009-of-00015.safetensors",
|
3785 |
-
"model.layers.31.block_sparse_moe.gate.scales": "model-00009-of-00015.safetensors",
|
3786 |
"model.layers.31.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
3787 |
"model.layers.31.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
3788 |
"model.layers.31.self_attn.k_proj.bias": "model-00009-of-00015.safetensors",
|
@@ -3925,10 +3843,7 @@
|
|
3925 |
"model.layers.32.block_sparse_moe.experts.7.w3.qweight": "model-00009-of-00015.safetensors",
|
3926 |
"model.layers.32.block_sparse_moe.experts.7.w3.qzeros": "model-00009-of-00015.safetensors",
|
3927 |
"model.layers.32.block_sparse_moe.experts.7.w3.scales": "model-00009-of-00015.safetensors",
|
3928 |
-
"model.layers.32.block_sparse_moe.gate.
|
3929 |
-
"model.layers.32.block_sparse_moe.gate.g_idx": "model-00009-of-00015.safetensors",
|
3930 |
-
"model.layers.32.block_sparse_moe.gate.qweight": "model-00009-of-00015.safetensors",
|
3931 |
-
"model.layers.32.block_sparse_moe.gate.scales": "model-00009-of-00015.safetensors",
|
3932 |
"model.layers.32.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
3933 |
"model.layers.32.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
3934 |
"model.layers.32.self_attn.k_proj.bias": "model-00009-of-00015.safetensors",
|
@@ -4071,10 +3986,7 @@
|
|
4071 |
"model.layers.33.block_sparse_moe.experts.7.w3.qweight": "model-00009-of-00015.safetensors",
|
4072 |
"model.layers.33.block_sparse_moe.experts.7.w3.qzeros": "model-00009-of-00015.safetensors",
|
4073 |
"model.layers.33.block_sparse_moe.experts.7.w3.scales": "model-00009-of-00015.safetensors",
|
4074 |
-
"model.layers.33.block_sparse_moe.gate.
|
4075 |
-
"model.layers.33.block_sparse_moe.gate.g_idx": "model-00009-of-00015.safetensors",
|
4076 |
-
"model.layers.33.block_sparse_moe.gate.qweight": "model-00009-of-00015.safetensors",
|
4077 |
-
"model.layers.33.block_sparse_moe.gate.scales": "model-00009-of-00015.safetensors",
|
4078 |
"model.layers.33.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
4079 |
"model.layers.33.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
4080 |
"model.layers.33.self_attn.k_proj.bias": "model-00009-of-00015.safetensors",
|
@@ -4217,10 +4129,7 @@
|
|
4217 |
"model.layers.34.block_sparse_moe.experts.7.w3.qweight": "model-00010-of-00015.safetensors",
|
4218 |
"model.layers.34.block_sparse_moe.experts.7.w3.qzeros": "model-00010-of-00015.safetensors",
|
4219 |
"model.layers.34.block_sparse_moe.experts.7.w3.scales": "model-00010-of-00015.safetensors",
|
4220 |
-
"model.layers.34.block_sparse_moe.gate.
|
4221 |
-
"model.layers.34.block_sparse_moe.gate.g_idx": "model-00010-of-00015.safetensors",
|
4222 |
-
"model.layers.34.block_sparse_moe.gate.qweight": "model-00010-of-00015.safetensors",
|
4223 |
-
"model.layers.34.block_sparse_moe.gate.scales": "model-00010-of-00015.safetensors",
|
4224 |
"model.layers.34.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
4225 |
"model.layers.34.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
4226 |
"model.layers.34.self_attn.k_proj.bias": "model-00009-of-00015.safetensors",
|
@@ -4363,10 +4272,7 @@
|
|
4363 |
"model.layers.35.block_sparse_moe.experts.7.w3.qweight": "model-00010-of-00015.safetensors",
|
4364 |
"model.layers.35.block_sparse_moe.experts.7.w3.qzeros": "model-00010-of-00015.safetensors",
|
4365 |
"model.layers.35.block_sparse_moe.experts.7.w3.scales": "model-00010-of-00015.safetensors",
|
4366 |
-
"model.layers.35.block_sparse_moe.gate.
|
4367 |
-
"model.layers.35.block_sparse_moe.gate.g_idx": "model-00010-of-00015.safetensors",
|
4368 |
-
"model.layers.35.block_sparse_moe.gate.qweight": "model-00010-of-00015.safetensors",
|
4369 |
-
"model.layers.35.block_sparse_moe.gate.scales": "model-00010-of-00015.safetensors",
|
4370 |
"model.layers.35.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
4371 |
"model.layers.35.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
4372 |
"model.layers.35.self_attn.k_proj.bias": "model-00010-of-00015.safetensors",
|
@@ -4509,10 +4415,7 @@
|
|
4509 |
"model.layers.36.block_sparse_moe.experts.7.w3.qweight": "model-00010-of-00015.safetensors",
|
4510 |
"model.layers.36.block_sparse_moe.experts.7.w3.qzeros": "model-00010-of-00015.safetensors",
|
4511 |
"model.layers.36.block_sparse_moe.experts.7.w3.scales": "model-00010-of-00015.safetensors",
|
4512 |
-
"model.layers.36.block_sparse_moe.gate.
|
4513 |
-
"model.layers.36.block_sparse_moe.gate.g_idx": "model-00010-of-00015.safetensors",
|
4514 |
-
"model.layers.36.block_sparse_moe.gate.qweight": "model-00010-of-00015.safetensors",
|
4515 |
-
"model.layers.36.block_sparse_moe.gate.scales": "model-00010-of-00015.safetensors",
|
4516 |
"model.layers.36.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
4517 |
"model.layers.36.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
4518 |
"model.layers.36.self_attn.k_proj.bias": "model-00010-of-00015.safetensors",
|
@@ -4655,10 +4558,7 @@
|
|
4655 |
"model.layers.37.block_sparse_moe.experts.7.w3.qweight": "model-00011-of-00015.safetensors",
|
4656 |
"model.layers.37.block_sparse_moe.experts.7.w3.qzeros": "model-00011-of-00015.safetensors",
|
4657 |
"model.layers.37.block_sparse_moe.experts.7.w3.scales": "model-00011-of-00015.safetensors",
|
4658 |
-
"model.layers.37.block_sparse_moe.gate.
|
4659 |
-
"model.layers.37.block_sparse_moe.gate.g_idx": "model-00011-of-00015.safetensors",
|
4660 |
-
"model.layers.37.block_sparse_moe.gate.qweight": "model-00011-of-00015.safetensors",
|
4661 |
-
"model.layers.37.block_sparse_moe.gate.scales": "model-00011-of-00015.safetensors",
|
4662 |
"model.layers.37.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
4663 |
"model.layers.37.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
4664 |
"model.layers.37.self_attn.k_proj.bias": "model-00010-of-00015.safetensors",
|
@@ -4801,10 +4701,7 @@
|
|
4801 |
"model.layers.38.block_sparse_moe.experts.7.w3.qweight": "model-00011-of-00015.safetensors",
|
4802 |
"model.layers.38.block_sparse_moe.experts.7.w3.qzeros": "model-00011-of-00015.safetensors",
|
4803 |
"model.layers.38.block_sparse_moe.experts.7.w3.scales": "model-00011-of-00015.safetensors",
|
4804 |
-
"model.layers.38.block_sparse_moe.gate.
|
4805 |
-
"model.layers.38.block_sparse_moe.gate.g_idx": "model-00011-of-00015.safetensors",
|
4806 |
-
"model.layers.38.block_sparse_moe.gate.qweight": "model-00011-of-00015.safetensors",
|
4807 |
-
"model.layers.38.block_sparse_moe.gate.scales": "model-00011-of-00015.safetensors",
|
4808 |
"model.layers.38.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
4809 |
"model.layers.38.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
4810 |
"model.layers.38.self_attn.k_proj.bias": "model-00011-of-00015.safetensors",
|
@@ -4947,11 +4844,7 @@
|
|
4947 |
"model.layers.39.block_sparse_moe.experts.7.w3.qweight": "model-00011-of-00015.safetensors",
|
4948 |
"model.layers.39.block_sparse_moe.experts.7.w3.qzeros": "model-00011-of-00015.safetensors",
|
4949 |
"model.layers.39.block_sparse_moe.experts.7.w3.scales": "model-00011-of-00015.safetensors",
|
4950 |
-
"model.layers.39.block_sparse_moe.gate.
|
4951 |
-
"model.layers.39.block_sparse_moe.gate.g_idx": "model-00011-of-00015.safetensors",
|
4952 |
-
"model.layers.39.block_sparse_moe.gate.qweight": "model-00011-of-00015.safetensors",
|
4953 |
-
"model.layers.39.block_sparse_moe.gate.qzeros": "model-00011-of-00015.safetensors",
|
4954 |
-
"model.layers.39.block_sparse_moe.gate.scales": "model-00011-of-00015.safetensors",
|
4955 |
"model.layers.39.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
4956 |
"model.layers.39.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
4957 |
"model.layers.39.self_attn.k_proj.bias": "model-00011-of-00015.safetensors",
|
@@ -5094,10 +4987,7 @@
|
|
5094 |
"model.layers.4.block_sparse_moe.experts.7.w3.qweight": "model-00002-of-00015.safetensors",
|
5095 |
"model.layers.4.block_sparse_moe.experts.7.w3.qzeros": "model-00002-of-00015.safetensors",
|
5096 |
"model.layers.4.block_sparse_moe.experts.7.w3.scales": "model-00002-of-00015.safetensors",
|
5097 |
-
"model.layers.4.block_sparse_moe.gate.
|
5098 |
-
"model.layers.4.block_sparse_moe.gate.g_idx": "model-00002-of-00015.safetensors",
|
5099 |
-
"model.layers.4.block_sparse_moe.gate.qweight": "model-00002-of-00015.safetensors",
|
5100 |
-
"model.layers.4.block_sparse_moe.gate.scales": "model-00002-of-00015.safetensors",
|
5101 |
"model.layers.4.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
5102 |
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
5103 |
"model.layers.4.self_attn.k_proj.bias": "model-00002-of-00015.safetensors",
|
@@ -5240,10 +5130,7 @@
|
|
5240 |
"model.layers.40.block_sparse_moe.experts.7.w3.qweight": "model-00011-of-00015.safetensors",
|
5241 |
"model.layers.40.block_sparse_moe.experts.7.w3.qzeros": "model-00011-of-00015.safetensors",
|
5242 |
"model.layers.40.block_sparse_moe.experts.7.w3.scales": "model-00011-of-00015.safetensors",
|
5243 |
-
"model.layers.40.block_sparse_moe.gate.
|
5244 |
-
"model.layers.40.block_sparse_moe.gate.g_idx": "model-00011-of-00015.safetensors",
|
5245 |
-
"model.layers.40.block_sparse_moe.gate.qweight": "model-00011-of-00015.safetensors",
|
5246 |
-
"model.layers.40.block_sparse_moe.gate.scales": "model-00011-of-00015.safetensors",
|
5247 |
"model.layers.40.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
5248 |
"model.layers.40.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
5249 |
"model.layers.40.self_attn.k_proj.bias": "model-00011-of-00015.safetensors",
|
@@ -5386,10 +5273,7 @@
|
|
5386 |
"model.layers.41.block_sparse_moe.experts.7.w3.qweight": "model-00012-of-00015.safetensors",
|
5387 |
"model.layers.41.block_sparse_moe.experts.7.w3.qzeros": "model-00012-of-00015.safetensors",
|
5388 |
"model.layers.41.block_sparse_moe.experts.7.w3.scales": "model-00012-of-00015.safetensors",
|
5389 |
-
"model.layers.41.block_sparse_moe.gate.
|
5390 |
-
"model.layers.41.block_sparse_moe.gate.g_idx": "model-00012-of-00015.safetensors",
|
5391 |
-
"model.layers.41.block_sparse_moe.gate.qweight": "model-00012-of-00015.safetensors",
|
5392 |
-
"model.layers.41.block_sparse_moe.gate.scales": "model-00012-of-00015.safetensors",
|
5393 |
"model.layers.41.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
5394 |
"model.layers.41.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
5395 |
"model.layers.41.self_attn.k_proj.bias": "model-00011-of-00015.safetensors",
|
@@ -5532,10 +5416,7 @@
|
|
5532 |
"model.layers.42.block_sparse_moe.experts.7.w3.qweight": "model-00012-of-00015.safetensors",
|
5533 |
"model.layers.42.block_sparse_moe.experts.7.w3.qzeros": "model-00012-of-00015.safetensors",
|
5534 |
"model.layers.42.block_sparse_moe.experts.7.w3.scales": "model-00012-of-00015.safetensors",
|
5535 |
-
"model.layers.42.block_sparse_moe.gate.
|
5536 |
-
"model.layers.42.block_sparse_moe.gate.g_idx": "model-00012-of-00015.safetensors",
|
5537 |
-
"model.layers.42.block_sparse_moe.gate.qweight": "model-00012-of-00015.safetensors",
|
5538 |
-
"model.layers.42.block_sparse_moe.gate.scales": "model-00012-of-00015.safetensors",
|
5539 |
"model.layers.42.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
5540 |
"model.layers.42.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
5541 |
"model.layers.42.self_attn.k_proj.bias": "model-00012-of-00015.safetensors",
|
@@ -5678,10 +5559,7 @@
|
|
5678 |
"model.layers.43.block_sparse_moe.experts.7.w3.qweight": "model-00012-of-00015.safetensors",
|
5679 |
"model.layers.43.block_sparse_moe.experts.7.w3.qzeros": "model-00012-of-00015.safetensors",
|
5680 |
"model.layers.43.block_sparse_moe.experts.7.w3.scales": "model-00012-of-00015.safetensors",
|
5681 |
-
"model.layers.43.block_sparse_moe.gate.
|
5682 |
-
"model.layers.43.block_sparse_moe.gate.g_idx": "model-00012-of-00015.safetensors",
|
5683 |
-
"model.layers.43.block_sparse_moe.gate.qweight": "model-00012-of-00015.safetensors",
|
5684 |
-
"model.layers.43.block_sparse_moe.gate.scales": "model-00012-of-00015.safetensors",
|
5685 |
"model.layers.43.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
5686 |
"model.layers.43.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
5687 |
"model.layers.43.self_attn.k_proj.bias": "model-00012-of-00015.safetensors",
|
@@ -5824,10 +5702,7 @@
|
|
5824 |
"model.layers.44.block_sparse_moe.experts.7.w3.qweight": "model-00012-of-00015.safetensors",
|
5825 |
"model.layers.44.block_sparse_moe.experts.7.w3.qzeros": "model-00012-of-00015.safetensors",
|
5826 |
"model.layers.44.block_sparse_moe.experts.7.w3.scales": "model-00012-of-00015.safetensors",
|
5827 |
-
"model.layers.44.block_sparse_moe.gate.
|
5828 |
-
"model.layers.44.block_sparse_moe.gate.g_idx": "model-00012-of-00015.safetensors",
|
5829 |
-
"model.layers.44.block_sparse_moe.gate.qweight": "model-00012-of-00015.safetensors",
|
5830 |
-
"model.layers.44.block_sparse_moe.gate.scales": "model-00012-of-00015.safetensors",
|
5831 |
"model.layers.44.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
5832 |
"model.layers.44.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
5833 |
"model.layers.44.self_attn.k_proj.bias": "model-00012-of-00015.safetensors",
|
@@ -5970,10 +5845,7 @@
|
|
5970 |
"model.layers.45.block_sparse_moe.experts.7.w3.qweight": "model-00013-of-00015.safetensors",
|
5971 |
"model.layers.45.block_sparse_moe.experts.7.w3.qzeros": "model-00013-of-00015.safetensors",
|
5972 |
"model.layers.45.block_sparse_moe.experts.7.w3.scales": "model-00013-of-00015.safetensors",
|
5973 |
-
"model.layers.45.block_sparse_moe.gate.
|
5974 |
-
"model.layers.45.block_sparse_moe.gate.g_idx": "model-00013-of-00015.safetensors",
|
5975 |
-
"model.layers.45.block_sparse_moe.gate.qweight": "model-00013-of-00015.safetensors",
|
5976 |
-
"model.layers.45.block_sparse_moe.gate.scales": "model-00013-of-00015.safetensors",
|
5977 |
"model.layers.45.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
5978 |
"model.layers.45.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
5979 |
"model.layers.45.self_attn.k_proj.bias": "model-00012-of-00015.safetensors",
|
@@ -6116,10 +5988,7 @@
|
|
6116 |
"model.layers.46.block_sparse_moe.experts.7.w3.qweight": "model-00013-of-00015.safetensors",
|
6117 |
"model.layers.46.block_sparse_moe.experts.7.w3.qzeros": "model-00013-of-00015.safetensors",
|
6118 |
"model.layers.46.block_sparse_moe.experts.7.w3.scales": "model-00013-of-00015.safetensors",
|
6119 |
-
"model.layers.46.block_sparse_moe.gate.
|
6120 |
-
"model.layers.46.block_sparse_moe.gate.g_idx": "model-00013-of-00015.safetensors",
|
6121 |
-
"model.layers.46.block_sparse_moe.gate.qweight": "model-00013-of-00015.safetensors",
|
6122 |
-
"model.layers.46.block_sparse_moe.gate.scales": "model-00013-of-00015.safetensors",
|
6123 |
"model.layers.46.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
6124 |
"model.layers.46.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
6125 |
"model.layers.46.self_attn.k_proj.bias": "model-00013-of-00015.safetensors",
|
@@ -6262,11 +6131,7 @@
|
|
6262 |
"model.layers.47.block_sparse_moe.experts.7.w3.qweight": "model-00013-of-00015.safetensors",
|
6263 |
"model.layers.47.block_sparse_moe.experts.7.w3.qzeros": "model-00013-of-00015.safetensors",
|
6264 |
"model.layers.47.block_sparse_moe.experts.7.w3.scales": "model-00013-of-00015.safetensors",
|
6265 |
-
"model.layers.47.block_sparse_moe.gate.
|
6266 |
-
"model.layers.47.block_sparse_moe.gate.g_idx": "model-00013-of-00015.safetensors",
|
6267 |
-
"model.layers.47.block_sparse_moe.gate.qweight": "model-00013-of-00015.safetensors",
|
6268 |
-
"model.layers.47.block_sparse_moe.gate.qzeros": "model-00013-of-00015.safetensors",
|
6269 |
-
"model.layers.47.block_sparse_moe.gate.scales": "model-00013-of-00015.safetensors",
|
6270 |
"model.layers.47.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
6271 |
"model.layers.47.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
6272 |
"model.layers.47.self_attn.k_proj.bias": "model-00013-of-00015.safetensors",
|
@@ -6409,10 +6274,7 @@
|
|
6409 |
"model.layers.48.block_sparse_moe.experts.7.w3.qweight": "model-00013-of-00015.safetensors",
|
6410 |
"model.layers.48.block_sparse_moe.experts.7.w3.qzeros": "model-00013-of-00015.safetensors",
|
6411 |
"model.layers.48.block_sparse_moe.experts.7.w3.scales": "model-00013-of-00015.safetensors",
|
6412 |
-
"model.layers.48.block_sparse_moe.gate.
|
6413 |
-
"model.layers.48.block_sparse_moe.gate.g_idx": "model-00013-of-00015.safetensors",
|
6414 |
-
"model.layers.48.block_sparse_moe.gate.qweight": "model-00013-of-00015.safetensors",
|
6415 |
-
"model.layers.48.block_sparse_moe.gate.scales": "model-00013-of-00015.safetensors",
|
6416 |
"model.layers.48.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
6417 |
"model.layers.48.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
6418 |
"model.layers.48.self_attn.k_proj.bias": "model-00013-of-00015.safetensors",
|
@@ -6555,10 +6417,7 @@
|
|
6555 |
"model.layers.49.block_sparse_moe.experts.7.w3.qweight": "model-00014-of-00015.safetensors",
|
6556 |
"model.layers.49.block_sparse_moe.experts.7.w3.qzeros": "model-00014-of-00015.safetensors",
|
6557 |
"model.layers.49.block_sparse_moe.experts.7.w3.scales": "model-00014-of-00015.safetensors",
|
6558 |
-
"model.layers.49.block_sparse_moe.gate.
|
6559 |
-
"model.layers.49.block_sparse_moe.gate.g_idx": "model-00014-of-00015.safetensors",
|
6560 |
-
"model.layers.49.block_sparse_moe.gate.qweight": "model-00014-of-00015.safetensors",
|
6561 |
-
"model.layers.49.block_sparse_moe.gate.scales": "model-00014-of-00015.safetensors",
|
6562 |
"model.layers.49.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
6563 |
"model.layers.49.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
6564 |
"model.layers.49.self_attn.k_proj.bias": "model-00013-of-00015.safetensors",
|
@@ -6701,10 +6560,7 @@
|
|
6701 |
"model.layers.5.block_sparse_moe.experts.7.w3.qweight": "model-00002-of-00015.safetensors",
|
6702 |
"model.layers.5.block_sparse_moe.experts.7.w3.qzeros": "model-00002-of-00015.safetensors",
|
6703 |
"model.layers.5.block_sparse_moe.experts.7.w3.scales": "model-00002-of-00015.safetensors",
|
6704 |
-
"model.layers.5.block_sparse_moe.gate.
|
6705 |
-
"model.layers.5.block_sparse_moe.gate.g_idx": "model-00002-of-00015.safetensors",
|
6706 |
-
"model.layers.5.block_sparse_moe.gate.qweight": "model-00002-of-00015.safetensors",
|
6707 |
-
"model.layers.5.block_sparse_moe.gate.scales": "model-00002-of-00015.safetensors",
|
6708 |
"model.layers.5.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
6709 |
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
6710 |
"model.layers.5.self_attn.k_proj.bias": "model-00002-of-00015.safetensors",
|
@@ -6847,10 +6703,7 @@
|
|
6847 |
"model.layers.50.block_sparse_moe.experts.7.w3.qweight": "model-00014-of-00015.safetensors",
|
6848 |
"model.layers.50.block_sparse_moe.experts.7.w3.qzeros": "model-00014-of-00015.safetensors",
|
6849 |
"model.layers.50.block_sparse_moe.experts.7.w3.scales": "model-00014-of-00015.safetensors",
|
6850 |
-
"model.layers.50.block_sparse_moe.gate.
|
6851 |
-
"model.layers.50.block_sparse_moe.gate.g_idx": "model-00014-of-00015.safetensors",
|
6852 |
-
"model.layers.50.block_sparse_moe.gate.qweight": "model-00014-of-00015.safetensors",
|
6853 |
-
"model.layers.50.block_sparse_moe.gate.scales": "model-00014-of-00015.safetensors",
|
6854 |
"model.layers.50.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
6855 |
"model.layers.50.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
6856 |
"model.layers.50.self_attn.k_proj.bias": "model-00014-of-00015.safetensors",
|
@@ -6993,10 +6846,7 @@
|
|
6993 |
"model.layers.51.block_sparse_moe.experts.7.w3.qweight": "model-00014-of-00015.safetensors",
|
6994 |
"model.layers.51.block_sparse_moe.experts.7.w3.qzeros": "model-00014-of-00015.safetensors",
|
6995 |
"model.layers.51.block_sparse_moe.experts.7.w3.scales": "model-00014-of-00015.safetensors",
|
6996 |
-
"model.layers.51.block_sparse_moe.gate.
|
6997 |
-
"model.layers.51.block_sparse_moe.gate.g_idx": "model-00014-of-00015.safetensors",
|
6998 |
-
"model.layers.51.block_sparse_moe.gate.qweight": "model-00014-of-00015.safetensors",
|
6999 |
-
"model.layers.51.block_sparse_moe.gate.scales": "model-00014-of-00015.safetensors",
|
7000 |
"model.layers.51.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
7001 |
"model.layers.51.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
7002 |
"model.layers.51.self_attn.k_proj.bias": "model-00014-of-00015.safetensors",
|
@@ -7139,10 +6989,7 @@
|
|
7139 |
"model.layers.52.block_sparse_moe.experts.7.w3.qweight": "model-00014-of-00015.safetensors",
|
7140 |
"model.layers.52.block_sparse_moe.experts.7.w3.qzeros": "model-00014-of-00015.safetensors",
|
7141 |
"model.layers.52.block_sparse_moe.experts.7.w3.scales": "model-00014-of-00015.safetensors",
|
7142 |
-
"model.layers.52.block_sparse_moe.gate.
|
7143 |
-
"model.layers.52.block_sparse_moe.gate.g_idx": "model-00014-of-00015.safetensors",
|
7144 |
-
"model.layers.52.block_sparse_moe.gate.qweight": "model-00014-of-00015.safetensors",
|
7145 |
-
"model.layers.52.block_sparse_moe.gate.scales": "model-00014-of-00015.safetensors",
|
7146 |
"model.layers.52.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
7147 |
"model.layers.52.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
7148 |
"model.layers.52.self_attn.k_proj.bias": "model-00014-of-00015.safetensors",
|
@@ -7285,10 +7132,7 @@
|
|
7285 |
"model.layers.53.block_sparse_moe.experts.7.w3.qweight": "model-00015-of-00015.safetensors",
|
7286 |
"model.layers.53.block_sparse_moe.experts.7.w3.qzeros": "model-00015-of-00015.safetensors",
|
7287 |
"model.layers.53.block_sparse_moe.experts.7.w3.scales": "model-00015-of-00015.safetensors",
|
7288 |
-
"model.layers.53.block_sparse_moe.gate.
|
7289 |
-
"model.layers.53.block_sparse_moe.gate.g_idx": "model-00015-of-00015.safetensors",
|
7290 |
-
"model.layers.53.block_sparse_moe.gate.qweight": "model-00015-of-00015.safetensors",
|
7291 |
-
"model.layers.53.block_sparse_moe.gate.scales": "model-00015-of-00015.safetensors",
|
7292 |
"model.layers.53.input_layernorm.weight": "model-00015-of-00015.safetensors",
|
7293 |
"model.layers.53.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
|
7294 |
"model.layers.53.self_attn.k_proj.bias": "model-00014-of-00015.safetensors",
|
@@ -7431,10 +7275,7 @@
|
|
7431 |
"model.layers.54.block_sparse_moe.experts.7.w3.qweight": "model-00015-of-00015.safetensors",
|
7432 |
"model.layers.54.block_sparse_moe.experts.7.w3.qzeros": "model-00015-of-00015.safetensors",
|
7433 |
"model.layers.54.block_sparse_moe.experts.7.w3.scales": "model-00015-of-00015.safetensors",
|
7434 |
-
"model.layers.54.block_sparse_moe.gate.
|
7435 |
-
"model.layers.54.block_sparse_moe.gate.g_idx": "model-00015-of-00015.safetensors",
|
7436 |
-
"model.layers.54.block_sparse_moe.gate.qweight": "model-00015-of-00015.safetensors",
|
7437 |
-
"model.layers.54.block_sparse_moe.gate.scales": "model-00015-of-00015.safetensors",
|
7438 |
"model.layers.54.input_layernorm.weight": "model-00015-of-00015.safetensors",
|
7439 |
"model.layers.54.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
|
7440 |
"model.layers.54.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
|
@@ -7577,11 +7418,7 @@
|
|
7577 |
"model.layers.55.block_sparse_moe.experts.7.w3.qweight": "model-00015-of-00015.safetensors",
|
7578 |
"model.layers.55.block_sparse_moe.experts.7.w3.qzeros": "model-00015-of-00015.safetensors",
|
7579 |
"model.layers.55.block_sparse_moe.experts.7.w3.scales": "model-00015-of-00015.safetensors",
|
7580 |
-
"model.layers.55.block_sparse_moe.gate.
|
7581 |
-
"model.layers.55.block_sparse_moe.gate.g_idx": "model-00015-of-00015.safetensors",
|
7582 |
-
"model.layers.55.block_sparse_moe.gate.qweight": "model-00015-of-00015.safetensors",
|
7583 |
-
"model.layers.55.block_sparse_moe.gate.qzeros": "model-00015-of-00015.safetensors",
|
7584 |
-
"model.layers.55.block_sparse_moe.gate.scales": "model-00015-of-00015.safetensors",
|
7585 |
"model.layers.55.input_layernorm.weight": "model-00015-of-00015.safetensors",
|
7586 |
"model.layers.55.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
|
7587 |
"model.layers.55.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
|
@@ -7724,10 +7561,7 @@
|
|
7724 |
"model.layers.6.block_sparse_moe.experts.7.w3.qweight": "model-00002-of-00015.safetensors",
|
7725 |
"model.layers.6.block_sparse_moe.experts.7.w3.qzeros": "model-00002-of-00015.safetensors",
|
7726 |
"model.layers.6.block_sparse_moe.experts.7.w3.scales": "model-00002-of-00015.safetensors",
|
7727 |
-
"model.layers.6.block_sparse_moe.gate.
|
7728 |
-
"model.layers.6.block_sparse_moe.gate.g_idx": "model-00002-of-00015.safetensors",
|
7729 |
-
"model.layers.6.block_sparse_moe.gate.qweight": "model-00002-of-00015.safetensors",
|
7730 |
-
"model.layers.6.block_sparse_moe.gate.scales": "model-00002-of-00015.safetensors",
|
7731 |
"model.layers.6.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
7732 |
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
7733 |
"model.layers.6.self_attn.k_proj.bias": "model-00002-of-00015.safetensors",
|
@@ -7870,11 +7704,7 @@
|
|
7870 |
"model.layers.7.block_sparse_moe.experts.7.w3.qweight": "model-00003-of-00015.safetensors",
|
7871 |
"model.layers.7.block_sparse_moe.experts.7.w3.qzeros": "model-00003-of-00015.safetensors",
|
7872 |
"model.layers.7.block_sparse_moe.experts.7.w3.scales": "model-00003-of-00015.safetensors",
|
7873 |
-
"model.layers.7.block_sparse_moe.gate.
|
7874 |
-
"model.layers.7.block_sparse_moe.gate.g_idx": "model-00003-of-00015.safetensors",
|
7875 |
-
"model.layers.7.block_sparse_moe.gate.qweight": "model-00003-of-00015.safetensors",
|
7876 |
-
"model.layers.7.block_sparse_moe.gate.qzeros": "model-00003-of-00015.safetensors",
|
7877 |
-
"model.layers.7.block_sparse_moe.gate.scales": "model-00003-of-00015.safetensors",
|
7878 |
"model.layers.7.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
7879 |
"model.layers.7.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
7880 |
"model.layers.7.self_attn.k_proj.bias": "model-00002-of-00015.safetensors",
|
@@ -8017,10 +7847,7 @@
|
|
8017 |
"model.layers.8.block_sparse_moe.experts.7.w3.qweight": "model-00003-of-00015.safetensors",
|
8018 |
"model.layers.8.block_sparse_moe.experts.7.w3.qzeros": "model-00003-of-00015.safetensors",
|
8019 |
"model.layers.8.block_sparse_moe.experts.7.w3.scales": "model-00003-of-00015.safetensors",
|
8020 |
-
"model.layers.8.block_sparse_moe.gate.
|
8021 |
-
"model.layers.8.block_sparse_moe.gate.g_idx": "model-00003-of-00015.safetensors",
|
8022 |
-
"model.layers.8.block_sparse_moe.gate.qweight": "model-00003-of-00015.safetensors",
|
8023 |
-
"model.layers.8.block_sparse_moe.gate.scales": "model-00003-of-00015.safetensors",
|
8024 |
"model.layers.8.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
8025 |
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
8026 |
"model.layers.8.self_attn.k_proj.bias": "model-00003-of-00015.safetensors",
|
@@ -8163,10 +7990,7 @@
|
|
8163 |
"model.layers.9.block_sparse_moe.experts.7.w3.qweight": "model-00003-of-00015.safetensors",
|
8164 |
"model.layers.9.block_sparse_moe.experts.7.w3.qzeros": "model-00003-of-00015.safetensors",
|
8165 |
"model.layers.9.block_sparse_moe.experts.7.w3.scales": "model-00003-of-00015.safetensors",
|
8166 |
-
"model.layers.9.block_sparse_moe.gate.
|
8167 |
-
"model.layers.9.block_sparse_moe.gate.g_idx": "model-00003-of-00015.safetensors",
|
8168 |
-
"model.layers.9.block_sparse_moe.gate.qweight": "model-00003-of-00015.safetensors",
|
8169 |
-
"model.layers.9.block_sparse_moe.gate.scales": "model-00003-of-00015.safetensors",
|
8170 |
"model.layers.9.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
8171 |
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
8172 |
"model.layers.9.self_attn.k_proj.bias": "model-00003-of-00015.safetensors",
|
|
|
1 |
{
|
2 |
"metadata": {
|
3 |
+
"total_size": 73756291072
|
4 |
},
|
5 |
"weight_map": {
|
6 |
"lm_head.weight": "model-00015-of-00015.safetensors",
|
|
|
125 |
"model.layers.0.block_sparse_moe.experts.7.w3.qweight": "model-00001-of-00015.safetensors",
|
126 |
"model.layers.0.block_sparse_moe.experts.7.w3.qzeros": "model-00001-of-00015.safetensors",
|
127 |
"model.layers.0.block_sparse_moe.experts.7.w3.scales": "model-00001-of-00015.safetensors",
|
128 |
+
"model.layers.0.block_sparse_moe.gate.weight": "model-00001-of-00015.safetensors",
|
|
|
|
|
|
|
|
|
129 |
"model.layers.0.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
130 |
"model.layers.0.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
131 |
"model.layers.0.self_attn.k_proj.bias": "model-00001-of-00015.safetensors",
|
|
|
268 |
"model.layers.1.block_sparse_moe.experts.7.w3.qweight": "model-00001-of-00015.safetensors",
|
269 |
"model.layers.1.block_sparse_moe.experts.7.w3.qzeros": "model-00001-of-00015.safetensors",
|
270 |
"model.layers.1.block_sparse_moe.experts.7.w3.scales": "model-00001-of-00015.safetensors",
|
271 |
+
"model.layers.1.block_sparse_moe.gate.weight": "model-00001-of-00015.safetensors",
|
|
|
|
|
|
|
272 |
"model.layers.1.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
273 |
"model.layers.1.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
274 |
"model.layers.1.self_attn.k_proj.bias": "model-00001-of-00015.safetensors",
|
|
|
411 |
"model.layers.10.block_sparse_moe.experts.7.w3.qweight": "model-00003-of-00015.safetensors",
|
412 |
"model.layers.10.block_sparse_moe.experts.7.w3.qzeros": "model-00003-of-00015.safetensors",
|
413 |
"model.layers.10.block_sparse_moe.experts.7.w3.scales": "model-00003-of-00015.safetensors",
|
414 |
+
"model.layers.10.block_sparse_moe.gate.weight": "model-00003-of-00015.safetensors",
|
|
|
|
|
|
|
415 |
"model.layers.10.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
416 |
"model.layers.10.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
417 |
"model.layers.10.self_attn.k_proj.bias": "model-00003-of-00015.safetensors",
|
|
|
554 |
"model.layers.11.block_sparse_moe.experts.7.w3.qweight": "model-00004-of-00015.safetensors",
|
555 |
"model.layers.11.block_sparse_moe.experts.7.w3.qzeros": "model-00004-of-00015.safetensors",
|
556 |
"model.layers.11.block_sparse_moe.experts.7.w3.scales": "model-00004-of-00015.safetensors",
|
557 |
+
"model.layers.11.block_sparse_moe.gate.weight": "model-00003-of-00015.safetensors",
|
|
|
|
|
|
|
558 |
"model.layers.11.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
559 |
"model.layers.11.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
560 |
"model.layers.11.self_attn.k_proj.bias": "model-00003-of-00015.safetensors",
|
|
|
697 |
"model.layers.12.block_sparse_moe.experts.7.w3.qweight": "model-00004-of-00015.safetensors",
|
698 |
"model.layers.12.block_sparse_moe.experts.7.w3.qzeros": "model-00004-of-00015.safetensors",
|
699 |
"model.layers.12.block_sparse_moe.experts.7.w3.scales": "model-00004-of-00015.safetensors",
|
700 |
+
"model.layers.12.block_sparse_moe.gate.weight": "model-00004-of-00015.safetensors",
|
|
|
|
|
|
|
701 |
"model.layers.12.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
702 |
"model.layers.12.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
703 |
"model.layers.12.self_attn.k_proj.bias": "model-00004-of-00015.safetensors",
|
|
|
840 |
"model.layers.13.block_sparse_moe.experts.7.w3.qweight": "model-00004-of-00015.safetensors",
|
841 |
"model.layers.13.block_sparse_moe.experts.7.w3.qzeros": "model-00004-of-00015.safetensors",
|
842 |
"model.layers.13.block_sparse_moe.experts.7.w3.scales": "model-00004-of-00015.safetensors",
|
843 |
+
"model.layers.13.block_sparse_moe.gate.weight": "model-00004-of-00015.safetensors",
|
|
|
|
|
|
|
844 |
"model.layers.13.input_layernorm.weight": "model-00004-of-00015.safetensors",
|
845 |
"model.layers.13.post_attention_layernorm.weight": "model-00004-of-00015.safetensors",
|
846 |
"model.layers.13.self_attn.k_proj.bias": "model-00004-of-00015.safetensors",
|
|
|
983 |
"model.layers.14.block_sparse_moe.experts.7.w3.qweight": "model-00005-of-00015.safetensors",
|
984 |
"model.layers.14.block_sparse_moe.experts.7.w3.qzeros": "model-00005-of-00015.safetensors",
|
985 |
"model.layers.14.block_sparse_moe.experts.7.w3.scales": "model-00005-of-00015.safetensors",
|
986 |
+
"model.layers.14.block_sparse_moe.gate.weight": "model-00004-of-00015.safetensors",
|
|
|
|
|
|
|
987 |
"model.layers.14.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
988 |
"model.layers.14.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
989 |
"model.layers.14.self_attn.k_proj.bias": "model-00004-of-00015.safetensors",
|
|
|
1126 |
"model.layers.15.block_sparse_moe.experts.7.w3.qweight": "model-00005-of-00015.safetensors",
|
1127 |
"model.layers.15.block_sparse_moe.experts.7.w3.qzeros": "model-00005-of-00015.safetensors",
|
1128 |
"model.layers.15.block_sparse_moe.experts.7.w3.scales": "model-00005-of-00015.safetensors",
|
1129 |
+
"model.layers.15.block_sparse_moe.gate.weight": "model-00005-of-00015.safetensors",
|
|
|
|
|
|
|
|
|
1130 |
"model.layers.15.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
1131 |
"model.layers.15.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
1132 |
"model.layers.15.self_attn.k_proj.bias": "model-00005-of-00015.safetensors",
|
|
|
1269 |
"model.layers.16.block_sparse_moe.experts.7.w3.qweight": "model-00005-of-00015.safetensors",
|
1270 |
"model.layers.16.block_sparse_moe.experts.7.w3.qzeros": "model-00005-of-00015.safetensors",
|
1271 |
"model.layers.16.block_sparse_moe.experts.7.w3.scales": "model-00005-of-00015.safetensors",
|
1272 |
+
"model.layers.16.block_sparse_moe.gate.weight": "model-00005-of-00015.safetensors",
|
|
|
|
|
|
|
1273 |
"model.layers.16.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
1274 |
"model.layers.16.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
1275 |
"model.layers.16.self_attn.k_proj.bias": "model-00005-of-00015.safetensors",
|
|
|
1412 |
"model.layers.17.block_sparse_moe.experts.7.w3.qweight": "model-00005-of-00015.safetensors",
|
1413 |
"model.layers.17.block_sparse_moe.experts.7.w3.qzeros": "model-00005-of-00015.safetensors",
|
1414 |
"model.layers.17.block_sparse_moe.experts.7.w3.scales": "model-00005-of-00015.safetensors",
|
1415 |
+
"model.layers.17.block_sparse_moe.gate.weight": "model-00005-of-00015.safetensors",
|
|
|
|
|
|
|
1416 |
"model.layers.17.input_layernorm.weight": "model-00005-of-00015.safetensors",
|
1417 |
"model.layers.17.post_attention_layernorm.weight": "model-00005-of-00015.safetensors",
|
1418 |
"model.layers.17.self_attn.k_proj.bias": "model-00005-of-00015.safetensors",
|
|
|
1555 |
"model.layers.18.block_sparse_moe.experts.7.w3.qweight": "model-00006-of-00015.safetensors",
|
1556 |
"model.layers.18.block_sparse_moe.experts.7.w3.qzeros": "model-00006-of-00015.safetensors",
|
1557 |
"model.layers.18.block_sparse_moe.experts.7.w3.scales": "model-00006-of-00015.safetensors",
|
1558 |
+
"model.layers.18.block_sparse_moe.gate.weight": "model-00005-of-00015.safetensors",
|
|
|
|
|
|
|
1559 |
"model.layers.18.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
1560 |
"model.layers.18.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
1561 |
"model.layers.18.self_attn.k_proj.bias": "model-00005-of-00015.safetensors",
|
|
|
1698 |
"model.layers.19.block_sparse_moe.experts.7.w3.qweight": "model-00006-of-00015.safetensors",
|
1699 |
"model.layers.19.block_sparse_moe.experts.7.w3.qzeros": "model-00006-of-00015.safetensors",
|
1700 |
"model.layers.19.block_sparse_moe.experts.7.w3.scales": "model-00006-of-00015.safetensors",
|
1701 |
+
"model.layers.19.block_sparse_moe.gate.weight": "model-00006-of-00015.safetensors",
|
|
|
|
|
|
|
1702 |
"model.layers.19.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
1703 |
"model.layers.19.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
1704 |
"model.layers.19.self_attn.k_proj.bias": "model-00006-of-00015.safetensors",
|
|
|
1841 |
"model.layers.2.block_sparse_moe.experts.7.w3.qweight": "model-00001-of-00015.safetensors",
|
1842 |
"model.layers.2.block_sparse_moe.experts.7.w3.qzeros": "model-00001-of-00015.safetensors",
|
1843 |
"model.layers.2.block_sparse_moe.experts.7.w3.scales": "model-00001-of-00015.safetensors",
|
1844 |
+
"model.layers.2.block_sparse_moe.gate.weight": "model-00001-of-00015.safetensors",
|
|
|
|
|
|
|
1845 |
"model.layers.2.input_layernorm.weight": "model-00001-of-00015.safetensors",
|
1846 |
"model.layers.2.post_attention_layernorm.weight": "model-00001-of-00015.safetensors",
|
1847 |
"model.layers.2.self_attn.k_proj.bias": "model-00001-of-00015.safetensors",
|
|
|
1984 |
"model.layers.20.block_sparse_moe.experts.7.w3.qweight": "model-00006-of-00015.safetensors",
|
1985 |
"model.layers.20.block_sparse_moe.experts.7.w3.qzeros": "model-00006-of-00015.safetensors",
|
1986 |
"model.layers.20.block_sparse_moe.experts.7.w3.scales": "model-00006-of-00015.safetensors",
|
1987 |
+
"model.layers.20.block_sparse_moe.gate.weight": "model-00006-of-00015.safetensors",
|
|
|
|
|
|
|
1988 |
"model.layers.20.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
1989 |
"model.layers.20.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
1990 |
"model.layers.20.self_attn.k_proj.bias": "model-00006-of-00015.safetensors",
|
|
|
2127 |
"model.layers.21.block_sparse_moe.experts.7.w3.qweight": "model-00006-of-00015.safetensors",
|
2128 |
"model.layers.21.block_sparse_moe.experts.7.w3.qzeros": "model-00006-of-00015.safetensors",
|
2129 |
"model.layers.21.block_sparse_moe.experts.7.w3.scales": "model-00006-of-00015.safetensors",
|
2130 |
+
"model.layers.21.block_sparse_moe.gate.weight": "model-00006-of-00015.safetensors",
|
|
|
|
|
|
|
2131 |
"model.layers.21.input_layernorm.weight": "model-00006-of-00015.safetensors",
|
2132 |
"model.layers.21.post_attention_layernorm.weight": "model-00006-of-00015.safetensors",
|
2133 |
"model.layers.21.self_attn.k_proj.bias": "model-00006-of-00015.safetensors",
|
|
|
2270 |
"model.layers.22.block_sparse_moe.experts.7.w3.qweight": "model-00007-of-00015.safetensors",
|
2271 |
"model.layers.22.block_sparse_moe.experts.7.w3.qzeros": "model-00007-of-00015.safetensors",
|
2272 |
"model.layers.22.block_sparse_moe.experts.7.w3.scales": "model-00007-of-00015.safetensors",
|
2273 |
+
"model.layers.22.block_sparse_moe.gate.weight": "model-00006-of-00015.safetensors",
|
|
|
|
|
|
|
2274 |
"model.layers.22.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
2275 |
"model.layers.22.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
2276 |
"model.layers.22.self_attn.k_proj.bias": "model-00006-of-00015.safetensors",
|
|
|
2413 |
"model.layers.23.block_sparse_moe.experts.7.w3.qweight": "model-00007-of-00015.safetensors",
|
2414 |
"model.layers.23.block_sparse_moe.experts.7.w3.qzeros": "model-00007-of-00015.safetensors",
|
2415 |
"model.layers.23.block_sparse_moe.experts.7.w3.scales": "model-00007-of-00015.safetensors",
|
2416 |
+
"model.layers.23.block_sparse_moe.gate.weight": "model-00007-of-00015.safetensors",
|
|
|
|
|
|
|
|
|
2417 |
"model.layers.23.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
2418 |
"model.layers.23.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
2419 |
"model.layers.23.self_attn.k_proj.bias": "model-00007-of-00015.safetensors",
|
|
|
2556 |
"model.layers.24.block_sparse_moe.experts.7.w3.qweight": "model-00007-of-00015.safetensors",
|
2557 |
"model.layers.24.block_sparse_moe.experts.7.w3.qzeros": "model-00007-of-00015.safetensors",
|
2558 |
"model.layers.24.block_sparse_moe.experts.7.w3.scales": "model-00007-of-00015.safetensors",
|
2559 |
+
"model.layers.24.block_sparse_moe.gate.weight": "model-00007-of-00015.safetensors",
|
|
|
|
|
|
|
2560 |
"model.layers.24.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
2561 |
"model.layers.24.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
2562 |
"model.layers.24.self_attn.k_proj.bias": "model-00007-of-00015.safetensors",
|
|
|
2699 |
"model.layers.25.block_sparse_moe.experts.7.w3.qweight": "model-00007-of-00015.safetensors",
|
2700 |
"model.layers.25.block_sparse_moe.experts.7.w3.qzeros": "model-00007-of-00015.safetensors",
|
2701 |
"model.layers.25.block_sparse_moe.experts.7.w3.scales": "model-00007-of-00015.safetensors",
|
2702 |
+
"model.layers.25.block_sparse_moe.gate.weight": "model-00007-of-00015.safetensors",
|
|
|
|
|
|
|
2703 |
"model.layers.25.input_layernorm.weight": "model-00007-of-00015.safetensors",
|
2704 |
"model.layers.25.post_attention_layernorm.weight": "model-00007-of-00015.safetensors",
|
2705 |
"model.layers.25.self_attn.k_proj.bias": "model-00007-of-00015.safetensors",
|
|
|
2842 |
"model.layers.26.block_sparse_moe.experts.7.w3.qweight": "model-00008-of-00015.safetensors",
|
2843 |
"model.layers.26.block_sparse_moe.experts.7.w3.qzeros": "model-00008-of-00015.safetensors",
|
2844 |
"model.layers.26.block_sparse_moe.experts.7.w3.scales": "model-00008-of-00015.safetensors",
|
2845 |
+
"model.layers.26.block_sparse_moe.gate.weight": "model-00007-of-00015.safetensors",
|
|
|
|
|
|
|
2846 |
"model.layers.26.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
2847 |
"model.layers.26.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
2848 |
"model.layers.26.self_attn.k_proj.bias": "model-00007-of-00015.safetensors",
|
|
|
2985 |
"model.layers.27.block_sparse_moe.experts.7.w3.qweight": "model-00008-of-00015.safetensors",
|
2986 |
"model.layers.27.block_sparse_moe.experts.7.w3.qzeros": "model-00008-of-00015.safetensors",
|
2987 |
"model.layers.27.block_sparse_moe.experts.7.w3.scales": "model-00008-of-00015.safetensors",
|
2988 |
+
"model.layers.27.block_sparse_moe.gate.weight": "model-00008-of-00015.safetensors",
|
|
|
|
|
|
|
2989 |
"model.layers.27.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
2990 |
"model.layers.27.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
2991 |
"model.layers.27.self_attn.k_proj.bias": "model-00008-of-00015.safetensors",
|
|
|
3128 |
"model.layers.28.block_sparse_moe.experts.7.w3.qweight": "model-00008-of-00015.safetensors",
|
3129 |
"model.layers.28.block_sparse_moe.experts.7.w3.qzeros": "model-00008-of-00015.safetensors",
|
3130 |
"model.layers.28.block_sparse_moe.experts.7.w3.scales": "model-00008-of-00015.safetensors",
|
3131 |
+
"model.layers.28.block_sparse_moe.gate.weight": "model-00008-of-00015.safetensors",
|
|
|
|
|
|
|
3132 |
"model.layers.28.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
3133 |
"model.layers.28.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
3134 |
"model.layers.28.self_attn.k_proj.bias": "model-00008-of-00015.safetensors",
|
|
|
3271 |
"model.layers.29.block_sparse_moe.experts.7.w3.qweight": "model-00008-of-00015.safetensors",
|
3272 |
"model.layers.29.block_sparse_moe.experts.7.w3.qzeros": "model-00008-of-00015.safetensors",
|
3273 |
"model.layers.29.block_sparse_moe.experts.7.w3.scales": "model-00008-of-00015.safetensors",
|
3274 |
+
"model.layers.29.block_sparse_moe.gate.weight": "model-00008-of-00015.safetensors",
|
|
|
|
|
|
|
3275 |
"model.layers.29.input_layernorm.weight": "model-00008-of-00015.safetensors",
|
3276 |
"model.layers.29.post_attention_layernorm.weight": "model-00008-of-00015.safetensors",
|
3277 |
"model.layers.29.self_attn.k_proj.bias": "model-00008-of-00015.safetensors",
|
|
|
3414 |
"model.layers.3.block_sparse_moe.experts.7.w3.qweight": "model-00002-of-00015.safetensors",
|
3415 |
"model.layers.3.block_sparse_moe.experts.7.w3.qzeros": "model-00002-of-00015.safetensors",
|
3416 |
"model.layers.3.block_sparse_moe.experts.7.w3.scales": "model-00002-of-00015.safetensors",
|
3417 |
+
"model.layers.3.block_sparse_moe.gate.weight": "model-00001-of-00015.safetensors",
|
|
|
|
|
|
|
3418 |
"model.layers.3.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
3419 |
"model.layers.3.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
3420 |
"model.layers.3.self_attn.k_proj.bias": "model-00001-of-00015.safetensors",
|
|
|
3557 |
"model.layers.30.block_sparse_moe.experts.7.w3.qweight": "model-00009-of-00015.safetensors",
|
3558 |
"model.layers.30.block_sparse_moe.experts.7.w3.qzeros": "model-00009-of-00015.safetensors",
|
3559 |
"model.layers.30.block_sparse_moe.experts.7.w3.scales": "model-00009-of-00015.safetensors",
|
3560 |
+
"model.layers.30.block_sparse_moe.gate.weight": "model-00008-of-00015.safetensors",
|
|
|
|
|
|
|
3561 |
"model.layers.30.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
3562 |
"model.layers.30.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
3563 |
"model.layers.30.self_attn.k_proj.bias": "model-00008-of-00015.safetensors",
|
|
|
3700 |
"model.layers.31.block_sparse_moe.experts.7.w3.qweight": "model-00009-of-00015.safetensors",
|
3701 |
"model.layers.31.block_sparse_moe.experts.7.w3.qzeros": "model-00009-of-00015.safetensors",
|
3702 |
"model.layers.31.block_sparse_moe.experts.7.w3.scales": "model-00009-of-00015.safetensors",
|
3703 |
+
"model.layers.31.block_sparse_moe.gate.weight": "model-00009-of-00015.safetensors",
|
|
|
|
|
|
|
|
|
3704 |
"model.layers.31.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
3705 |
"model.layers.31.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
3706 |
"model.layers.31.self_attn.k_proj.bias": "model-00009-of-00015.safetensors",
|
|
|
3843 |
"model.layers.32.block_sparse_moe.experts.7.w3.qweight": "model-00009-of-00015.safetensors",
|
3844 |
"model.layers.32.block_sparse_moe.experts.7.w3.qzeros": "model-00009-of-00015.safetensors",
|
3845 |
"model.layers.32.block_sparse_moe.experts.7.w3.scales": "model-00009-of-00015.safetensors",
|
3846 |
+
"model.layers.32.block_sparse_moe.gate.weight": "model-00009-of-00015.safetensors",
|
|
|
|
|
|
|
3847 |
"model.layers.32.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
3848 |
"model.layers.32.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
3849 |
"model.layers.32.self_attn.k_proj.bias": "model-00009-of-00015.safetensors",
|
|
|
3986 |
"model.layers.33.block_sparse_moe.experts.7.w3.qweight": "model-00009-of-00015.safetensors",
|
3987 |
"model.layers.33.block_sparse_moe.experts.7.w3.qzeros": "model-00009-of-00015.safetensors",
|
3988 |
"model.layers.33.block_sparse_moe.experts.7.w3.scales": "model-00009-of-00015.safetensors",
|
3989 |
+
"model.layers.33.block_sparse_moe.gate.weight": "model-00009-of-00015.safetensors",
|
|
|
|
|
|
|
3990 |
"model.layers.33.input_layernorm.weight": "model-00009-of-00015.safetensors",
|
3991 |
"model.layers.33.post_attention_layernorm.weight": "model-00009-of-00015.safetensors",
|
3992 |
"model.layers.33.self_attn.k_proj.bias": "model-00009-of-00015.safetensors",
|
|
|
4129 |
"model.layers.34.block_sparse_moe.experts.7.w3.qweight": "model-00010-of-00015.safetensors",
|
4130 |
"model.layers.34.block_sparse_moe.experts.7.w3.qzeros": "model-00010-of-00015.safetensors",
|
4131 |
"model.layers.34.block_sparse_moe.experts.7.w3.scales": "model-00010-of-00015.safetensors",
|
4132 |
+
"model.layers.34.block_sparse_moe.gate.weight": "model-00009-of-00015.safetensors",
|
|
|
|
|
|
|
4133 |
"model.layers.34.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
4134 |
"model.layers.34.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
4135 |
"model.layers.34.self_attn.k_proj.bias": "model-00009-of-00015.safetensors",
|
|
|
4272 |
"model.layers.35.block_sparse_moe.experts.7.w3.qweight": "model-00010-of-00015.safetensors",
|
4273 |
"model.layers.35.block_sparse_moe.experts.7.w3.qzeros": "model-00010-of-00015.safetensors",
|
4274 |
"model.layers.35.block_sparse_moe.experts.7.w3.scales": "model-00010-of-00015.safetensors",
|
4275 |
+
"model.layers.35.block_sparse_moe.gate.weight": "model-00010-of-00015.safetensors",
|
|
|
|
|
|
|
4276 |
"model.layers.35.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
4277 |
"model.layers.35.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
4278 |
"model.layers.35.self_attn.k_proj.bias": "model-00010-of-00015.safetensors",
|
|
|
4415 |
"model.layers.36.block_sparse_moe.experts.7.w3.qweight": "model-00010-of-00015.safetensors",
|
4416 |
"model.layers.36.block_sparse_moe.experts.7.w3.qzeros": "model-00010-of-00015.safetensors",
|
4417 |
"model.layers.36.block_sparse_moe.experts.7.w3.scales": "model-00010-of-00015.safetensors",
|
4418 |
+
"model.layers.36.block_sparse_moe.gate.weight": "model-00010-of-00015.safetensors",
|
|
|
|
|
|
|
4419 |
"model.layers.36.input_layernorm.weight": "model-00010-of-00015.safetensors",
|
4420 |
"model.layers.36.post_attention_layernorm.weight": "model-00010-of-00015.safetensors",
|
4421 |
"model.layers.36.self_attn.k_proj.bias": "model-00010-of-00015.safetensors",
|
|
|
4558 |
"model.layers.37.block_sparse_moe.experts.7.w3.qweight": "model-00011-of-00015.safetensors",
|
4559 |
"model.layers.37.block_sparse_moe.experts.7.w3.qzeros": "model-00011-of-00015.safetensors",
|
4560 |
"model.layers.37.block_sparse_moe.experts.7.w3.scales": "model-00011-of-00015.safetensors",
|
4561 |
+
"model.layers.37.block_sparse_moe.gate.weight": "model-00010-of-00015.safetensors",
|
|
|
|
|
|
|
4562 |
"model.layers.37.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
4563 |
"model.layers.37.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
4564 |
"model.layers.37.self_attn.k_proj.bias": "model-00010-of-00015.safetensors",
|
|
|
4701 |
"model.layers.38.block_sparse_moe.experts.7.w3.qweight": "model-00011-of-00015.safetensors",
|
4702 |
"model.layers.38.block_sparse_moe.experts.7.w3.qzeros": "model-00011-of-00015.safetensors",
|
4703 |
"model.layers.38.block_sparse_moe.experts.7.w3.scales": "model-00011-of-00015.safetensors",
|
4704 |
+
"model.layers.38.block_sparse_moe.gate.weight": "model-00011-of-00015.safetensors",
|
|
|
|
|
|
|
4705 |
"model.layers.38.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
4706 |
"model.layers.38.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
4707 |
"model.layers.38.self_attn.k_proj.bias": "model-00011-of-00015.safetensors",
|
|
|
4844 |
"model.layers.39.block_sparse_moe.experts.7.w3.qweight": "model-00011-of-00015.safetensors",
|
4845 |
"model.layers.39.block_sparse_moe.experts.7.w3.qzeros": "model-00011-of-00015.safetensors",
|
4846 |
"model.layers.39.block_sparse_moe.experts.7.w3.scales": "model-00011-of-00015.safetensors",
|
4847 |
+
"model.layers.39.block_sparse_moe.gate.weight": "model-00011-of-00015.safetensors",
|
|
|
|
|
|
|
|
|
4848 |
"model.layers.39.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
4849 |
"model.layers.39.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
4850 |
"model.layers.39.self_attn.k_proj.bias": "model-00011-of-00015.safetensors",
|
|
|
4987 |
"model.layers.4.block_sparse_moe.experts.7.w3.qweight": "model-00002-of-00015.safetensors",
|
4988 |
"model.layers.4.block_sparse_moe.experts.7.w3.qzeros": "model-00002-of-00015.safetensors",
|
4989 |
"model.layers.4.block_sparse_moe.experts.7.w3.scales": "model-00002-of-00015.safetensors",
|
4990 |
+
"model.layers.4.block_sparse_moe.gate.weight": "model-00002-of-00015.safetensors",
|
|
|
|
|
|
|
4991 |
"model.layers.4.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
4992 |
"model.layers.4.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
4993 |
"model.layers.4.self_attn.k_proj.bias": "model-00002-of-00015.safetensors",
|
|
|
5130 |
"model.layers.40.block_sparse_moe.experts.7.w3.qweight": "model-00011-of-00015.safetensors",
|
5131 |
"model.layers.40.block_sparse_moe.experts.7.w3.qzeros": "model-00011-of-00015.safetensors",
|
5132 |
"model.layers.40.block_sparse_moe.experts.7.w3.scales": "model-00011-of-00015.safetensors",
|
5133 |
+
"model.layers.40.block_sparse_moe.gate.weight": "model-00011-of-00015.safetensors",
|
|
|
|
|
|
|
5134 |
"model.layers.40.input_layernorm.weight": "model-00011-of-00015.safetensors",
|
5135 |
"model.layers.40.post_attention_layernorm.weight": "model-00011-of-00015.safetensors",
|
5136 |
"model.layers.40.self_attn.k_proj.bias": "model-00011-of-00015.safetensors",
|
|
|
5273 |
"model.layers.41.block_sparse_moe.experts.7.w3.qweight": "model-00012-of-00015.safetensors",
|
5274 |
"model.layers.41.block_sparse_moe.experts.7.w3.qzeros": "model-00012-of-00015.safetensors",
|
5275 |
"model.layers.41.block_sparse_moe.experts.7.w3.scales": "model-00012-of-00015.safetensors",
|
5276 |
+
"model.layers.41.block_sparse_moe.gate.weight": "model-00011-of-00015.safetensors",
|
|
|
|
|
|
|
5277 |
"model.layers.41.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
5278 |
"model.layers.41.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
5279 |
"model.layers.41.self_attn.k_proj.bias": "model-00011-of-00015.safetensors",
|
|
|
5416 |
"model.layers.42.block_sparse_moe.experts.7.w3.qweight": "model-00012-of-00015.safetensors",
|
5417 |
"model.layers.42.block_sparse_moe.experts.7.w3.qzeros": "model-00012-of-00015.safetensors",
|
5418 |
"model.layers.42.block_sparse_moe.experts.7.w3.scales": "model-00012-of-00015.safetensors",
|
5419 |
+
"model.layers.42.block_sparse_moe.gate.weight": "model-00012-of-00015.safetensors",
|
|
|
|
|
|
|
5420 |
"model.layers.42.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
5421 |
"model.layers.42.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
5422 |
"model.layers.42.self_attn.k_proj.bias": "model-00012-of-00015.safetensors",
|
|
|
5559 |
"model.layers.43.block_sparse_moe.experts.7.w3.qweight": "model-00012-of-00015.safetensors",
|
5560 |
"model.layers.43.block_sparse_moe.experts.7.w3.qzeros": "model-00012-of-00015.safetensors",
|
5561 |
"model.layers.43.block_sparse_moe.experts.7.w3.scales": "model-00012-of-00015.safetensors",
|
5562 |
+
"model.layers.43.block_sparse_moe.gate.weight": "model-00012-of-00015.safetensors",
|
|
|
|
|
|
|
5563 |
"model.layers.43.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
5564 |
"model.layers.43.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
5565 |
"model.layers.43.self_attn.k_proj.bias": "model-00012-of-00015.safetensors",
|
|
|
5702 |
"model.layers.44.block_sparse_moe.experts.7.w3.qweight": "model-00012-of-00015.safetensors",
|
5703 |
"model.layers.44.block_sparse_moe.experts.7.w3.qzeros": "model-00012-of-00015.safetensors",
|
5704 |
"model.layers.44.block_sparse_moe.experts.7.w3.scales": "model-00012-of-00015.safetensors",
|
5705 |
+
"model.layers.44.block_sparse_moe.gate.weight": "model-00012-of-00015.safetensors",
|
|
|
|
|
|
|
5706 |
"model.layers.44.input_layernorm.weight": "model-00012-of-00015.safetensors",
|
5707 |
"model.layers.44.post_attention_layernorm.weight": "model-00012-of-00015.safetensors",
|
5708 |
"model.layers.44.self_attn.k_proj.bias": "model-00012-of-00015.safetensors",
|
|
|
5845 |
"model.layers.45.block_sparse_moe.experts.7.w3.qweight": "model-00013-of-00015.safetensors",
|
5846 |
"model.layers.45.block_sparse_moe.experts.7.w3.qzeros": "model-00013-of-00015.safetensors",
|
5847 |
"model.layers.45.block_sparse_moe.experts.7.w3.scales": "model-00013-of-00015.safetensors",
|
5848 |
+
"model.layers.45.block_sparse_moe.gate.weight": "model-00012-of-00015.safetensors",
|
|
|
|
|
|
|
5849 |
"model.layers.45.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
5850 |
"model.layers.45.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
5851 |
"model.layers.45.self_attn.k_proj.bias": "model-00012-of-00015.safetensors",
|
|
|
5988 |
"model.layers.46.block_sparse_moe.experts.7.w3.qweight": "model-00013-of-00015.safetensors",
|
5989 |
"model.layers.46.block_sparse_moe.experts.7.w3.qzeros": "model-00013-of-00015.safetensors",
|
5990 |
"model.layers.46.block_sparse_moe.experts.7.w3.scales": "model-00013-of-00015.safetensors",
|
5991 |
+
"model.layers.46.block_sparse_moe.gate.weight": "model-00013-of-00015.safetensors",
|
|
|
|
|
|
|
5992 |
"model.layers.46.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
5993 |
"model.layers.46.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
5994 |
"model.layers.46.self_attn.k_proj.bias": "model-00013-of-00015.safetensors",
|
|
|
6131 |
"model.layers.47.block_sparse_moe.experts.7.w3.qweight": "model-00013-of-00015.safetensors",
|
6132 |
"model.layers.47.block_sparse_moe.experts.7.w3.qzeros": "model-00013-of-00015.safetensors",
|
6133 |
"model.layers.47.block_sparse_moe.experts.7.w3.scales": "model-00013-of-00015.safetensors",
|
6134 |
+
"model.layers.47.block_sparse_moe.gate.weight": "model-00013-of-00015.safetensors",
|
|
|
|
|
|
|
|
|
6135 |
"model.layers.47.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
6136 |
"model.layers.47.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
6137 |
"model.layers.47.self_attn.k_proj.bias": "model-00013-of-00015.safetensors",
|
|
|
6274 |
"model.layers.48.block_sparse_moe.experts.7.w3.qweight": "model-00013-of-00015.safetensors",
|
6275 |
"model.layers.48.block_sparse_moe.experts.7.w3.qzeros": "model-00013-of-00015.safetensors",
|
6276 |
"model.layers.48.block_sparse_moe.experts.7.w3.scales": "model-00013-of-00015.safetensors",
|
6277 |
+
"model.layers.48.block_sparse_moe.gate.weight": "model-00013-of-00015.safetensors",
|
|
|
|
|
|
|
6278 |
"model.layers.48.input_layernorm.weight": "model-00013-of-00015.safetensors",
|
6279 |
"model.layers.48.post_attention_layernorm.weight": "model-00013-of-00015.safetensors",
|
6280 |
"model.layers.48.self_attn.k_proj.bias": "model-00013-of-00015.safetensors",
|
|
|
6417 |
"model.layers.49.block_sparse_moe.experts.7.w3.qweight": "model-00014-of-00015.safetensors",
|
6418 |
"model.layers.49.block_sparse_moe.experts.7.w3.qzeros": "model-00014-of-00015.safetensors",
|
6419 |
"model.layers.49.block_sparse_moe.experts.7.w3.scales": "model-00014-of-00015.safetensors",
|
6420 |
+
"model.layers.49.block_sparse_moe.gate.weight": "model-00013-of-00015.safetensors",
|
|
|
|
|
|
|
6421 |
"model.layers.49.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
6422 |
"model.layers.49.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
6423 |
"model.layers.49.self_attn.k_proj.bias": "model-00013-of-00015.safetensors",
|
|
|
6560 |
"model.layers.5.block_sparse_moe.experts.7.w3.qweight": "model-00002-of-00015.safetensors",
|
6561 |
"model.layers.5.block_sparse_moe.experts.7.w3.qzeros": "model-00002-of-00015.safetensors",
|
6562 |
"model.layers.5.block_sparse_moe.experts.7.w3.scales": "model-00002-of-00015.safetensors",
|
6563 |
+
"model.layers.5.block_sparse_moe.gate.weight": "model-00002-of-00015.safetensors",
|
|
|
|
|
|
|
6564 |
"model.layers.5.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
6565 |
"model.layers.5.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
6566 |
"model.layers.5.self_attn.k_proj.bias": "model-00002-of-00015.safetensors",
|
|
|
6703 |
"model.layers.50.block_sparse_moe.experts.7.w3.qweight": "model-00014-of-00015.safetensors",
|
6704 |
"model.layers.50.block_sparse_moe.experts.7.w3.qzeros": "model-00014-of-00015.safetensors",
|
6705 |
"model.layers.50.block_sparse_moe.experts.7.w3.scales": "model-00014-of-00015.safetensors",
|
6706 |
+
"model.layers.50.block_sparse_moe.gate.weight": "model-00014-of-00015.safetensors",
|
|
|
|
|
|
|
6707 |
"model.layers.50.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
6708 |
"model.layers.50.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
6709 |
"model.layers.50.self_attn.k_proj.bias": "model-00014-of-00015.safetensors",
|
|
|
6846 |
"model.layers.51.block_sparse_moe.experts.7.w3.qweight": "model-00014-of-00015.safetensors",
|
6847 |
"model.layers.51.block_sparse_moe.experts.7.w3.qzeros": "model-00014-of-00015.safetensors",
|
6848 |
"model.layers.51.block_sparse_moe.experts.7.w3.scales": "model-00014-of-00015.safetensors",
|
6849 |
+
"model.layers.51.block_sparse_moe.gate.weight": "model-00014-of-00015.safetensors",
|
|
|
|
|
|
|
6850 |
"model.layers.51.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
6851 |
"model.layers.51.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
6852 |
"model.layers.51.self_attn.k_proj.bias": "model-00014-of-00015.safetensors",
|
|
|
6989 |
"model.layers.52.block_sparse_moe.experts.7.w3.qweight": "model-00014-of-00015.safetensors",
|
6990 |
"model.layers.52.block_sparse_moe.experts.7.w3.qzeros": "model-00014-of-00015.safetensors",
|
6991 |
"model.layers.52.block_sparse_moe.experts.7.w3.scales": "model-00014-of-00015.safetensors",
|
6992 |
+
"model.layers.52.block_sparse_moe.gate.weight": "model-00014-of-00015.safetensors",
|
|
|
|
|
|
|
6993 |
"model.layers.52.input_layernorm.weight": "model-00014-of-00015.safetensors",
|
6994 |
"model.layers.52.post_attention_layernorm.weight": "model-00014-of-00015.safetensors",
|
6995 |
"model.layers.52.self_attn.k_proj.bias": "model-00014-of-00015.safetensors",
|
|
|
7132 |
"model.layers.53.block_sparse_moe.experts.7.w3.qweight": "model-00015-of-00015.safetensors",
|
7133 |
"model.layers.53.block_sparse_moe.experts.7.w3.qzeros": "model-00015-of-00015.safetensors",
|
7134 |
"model.layers.53.block_sparse_moe.experts.7.w3.scales": "model-00015-of-00015.safetensors",
|
7135 |
+
"model.layers.53.block_sparse_moe.gate.weight": "model-00014-of-00015.safetensors",
|
|
|
|
|
|
|
7136 |
"model.layers.53.input_layernorm.weight": "model-00015-of-00015.safetensors",
|
7137 |
"model.layers.53.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
|
7138 |
"model.layers.53.self_attn.k_proj.bias": "model-00014-of-00015.safetensors",
|
|
|
7275 |
"model.layers.54.block_sparse_moe.experts.7.w3.qweight": "model-00015-of-00015.safetensors",
|
7276 |
"model.layers.54.block_sparse_moe.experts.7.w3.qzeros": "model-00015-of-00015.safetensors",
|
7277 |
"model.layers.54.block_sparse_moe.experts.7.w3.scales": "model-00015-of-00015.safetensors",
|
7278 |
+
"model.layers.54.block_sparse_moe.gate.weight": "model-00015-of-00015.safetensors",
|
|
|
|
|
|
|
7279 |
"model.layers.54.input_layernorm.weight": "model-00015-of-00015.safetensors",
|
7280 |
"model.layers.54.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
|
7281 |
"model.layers.54.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
|
|
|
7418 |
"model.layers.55.block_sparse_moe.experts.7.w3.qweight": "model-00015-of-00015.safetensors",
|
7419 |
"model.layers.55.block_sparse_moe.experts.7.w3.qzeros": "model-00015-of-00015.safetensors",
|
7420 |
"model.layers.55.block_sparse_moe.experts.7.w3.scales": "model-00015-of-00015.safetensors",
|
7421 |
+
"model.layers.55.block_sparse_moe.gate.weight": "model-00015-of-00015.safetensors",
|
|
|
|
|
|
|
|
|
7422 |
"model.layers.55.input_layernorm.weight": "model-00015-of-00015.safetensors",
|
7423 |
"model.layers.55.post_attention_layernorm.weight": "model-00015-of-00015.safetensors",
|
7424 |
"model.layers.55.self_attn.k_proj.bias": "model-00015-of-00015.safetensors",
|
|
|
7561 |
"model.layers.6.block_sparse_moe.experts.7.w3.qweight": "model-00002-of-00015.safetensors",
|
7562 |
"model.layers.6.block_sparse_moe.experts.7.w3.qzeros": "model-00002-of-00015.safetensors",
|
7563 |
"model.layers.6.block_sparse_moe.experts.7.w3.scales": "model-00002-of-00015.safetensors",
|
7564 |
+
"model.layers.6.block_sparse_moe.gate.weight": "model-00002-of-00015.safetensors",
|
|
|
|
|
|
|
7565 |
"model.layers.6.input_layernorm.weight": "model-00002-of-00015.safetensors",
|
7566 |
"model.layers.6.post_attention_layernorm.weight": "model-00002-of-00015.safetensors",
|
7567 |
"model.layers.6.self_attn.k_proj.bias": "model-00002-of-00015.safetensors",
|
|
|
7704 |
"model.layers.7.block_sparse_moe.experts.7.w3.qweight": "model-00003-of-00015.safetensors",
|
7705 |
"model.layers.7.block_sparse_moe.experts.7.w3.qzeros": "model-00003-of-00015.safetensors",
|
7706 |
"model.layers.7.block_sparse_moe.experts.7.w3.scales": "model-00003-of-00015.safetensors",
|
7707 |
+
"model.layers.7.block_sparse_moe.gate.weight": "model-00002-of-00015.safetensors",
|
|
|
|
|
|
|
|
|
7708 |
"model.layers.7.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
7709 |
"model.layers.7.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
7710 |
"model.layers.7.self_attn.k_proj.bias": "model-00002-of-00015.safetensors",
|
|
|
7847 |
"model.layers.8.block_sparse_moe.experts.7.w3.qweight": "model-00003-of-00015.safetensors",
|
7848 |
"model.layers.8.block_sparse_moe.experts.7.w3.qzeros": "model-00003-of-00015.safetensors",
|
7849 |
"model.layers.8.block_sparse_moe.experts.7.w3.scales": "model-00003-of-00015.safetensors",
|
7850 |
+
"model.layers.8.block_sparse_moe.gate.weight": "model-00003-of-00015.safetensors",
|
|
|
|
|
|
|
7851 |
"model.layers.8.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
7852 |
"model.layers.8.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
7853 |
"model.layers.8.self_attn.k_proj.bias": "model-00003-of-00015.safetensors",
|
|
|
7990 |
"model.layers.9.block_sparse_moe.experts.7.w3.qweight": "model-00003-of-00015.safetensors",
|
7991 |
"model.layers.9.block_sparse_moe.experts.7.w3.qzeros": "model-00003-of-00015.safetensors",
|
7992 |
"model.layers.9.block_sparse_moe.experts.7.w3.scales": "model-00003-of-00015.safetensors",
|
7993 |
+
"model.layers.9.block_sparse_moe.gate.weight": "model-00003-of-00015.safetensors",
|
|
|
|
|
|
|
7994 |
"model.layers.9.input_layernorm.weight": "model-00003-of-00015.safetensors",
|
7995 |
"model.layers.9.post_attention_layernorm.weight": "model-00003-of-00015.safetensors",
|
7996 |
"model.layers.9.self_attn.k_proj.bias": "model-00003-of-00015.safetensors",
|