numen-tech's picture
Add weights
744bd08
{
"metadata": {
"ParamSize": 315,
"ParamBytes": 1348381512.0,
"BitsPerParam": 4.076776538691619
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 294914304,
"records": [
{
"name": "model.embed_tokens.q_weight",
"shape": [
256002,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 294914304,
"byteOffset": 0
}
],
"md5sum": "2ce726fb5bdd8b471216d9e94d1ee166"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "3a8dc7328882421eeacfc9af13d73c8f"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 28150344,
"records": [
{
"name": "model.embed_tokens.q_scale",
"shape": [
256002,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216072,
"byteOffset": 0
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 9216072
},
{
"name": "model.layers.0.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 9220680
},
{
"name": "model.layers.0.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 19837512
},
{
"name": "model.layers.0.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 20169288
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 20832840
},
{
"name": "model.layers.0.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 20837448
},
{
"name": "model.layers.0.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 20842056
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 20846664
},
{
"name": "model.layers.0.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 25565256
},
{
"name": "model.layers.0.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 25712712
},
{
"name": "model.layers.0.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 28072008
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 28145736
}
],
"md5sum": "b3dd7a4070ba7842fadb90fd2d1fdfcf"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.1.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.1.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.1.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.1.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "dcda6dd2caae5969f9a5e7c91b9d7c6b"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "a533c11726f297196d52a9bc585de255"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 26233344,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.1.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.1.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.1.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.10.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.10.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.10.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.10.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.10.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.10.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.10.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.10.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 26228736
}
],
"md5sum": "7d1748cf5773dd58b4a97fe330d3da5b"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.11.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.11.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.11.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.11.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "51f33f832cbc5df8d2a2ed18ffb9a49e"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "4233ddc538ff7757f51816f0a96bddec"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 26233344,
"records": [
{
"name": "model.layers.11.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.11.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.11.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.11.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.12.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.12.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.12.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.12.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.12.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.12.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.12.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.12.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 26228736
}
],
"md5sum": "55a32852ce120de29afd69f3f05f92ba"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.13.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.13.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.13.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.13.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "f4e230e02ca84a5683448376b8cd2e41"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "7a2f8438a4bbc93d5169c6dee716119e"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 26233344,
"records": [
{
"name": "model.layers.13.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.13.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.13.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.13.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.14.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.14.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.14.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.14.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.14.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.14.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.14.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.14.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 26228736
}
],
"md5sum": "6fd7bdf0f98c0d2f8ddb22d020f19c46"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.15.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.15.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.15.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.15.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "057b8f41e65e197403461f4fbda7b10a"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "05d3ff23cb789d738e5e3fd162324f06"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 26233344,
"records": [
{
"name": "model.layers.15.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.15.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.15.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.15.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.16.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.16.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.16.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.16.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.16.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.16.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.16.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.16.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 26228736
}
],
"md5sum": "4f59579a703aabd2c40cc4d37e54f00f"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.17.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.17.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.17.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.17.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "c4ecef56294d704740f5c6bff5d547af"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "f5805e15aa7badc40c826ada16901535"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 26233344,
"records": [
{
"name": "model.layers.17.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.17.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.17.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.17.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.18.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.18.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.18.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.18.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.18.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.18.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.18.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.18.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 26228736
}
],
"md5sum": "2cc3f2edf597c7c22e88aeff85267b43"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.19.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.19.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.19.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.19.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "575b85ee2a941727b5f82a882d0f5bf1"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "91ea0963992a1a30fac6b72a34d1d27c"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 26233344,
"records": [
{
"name": "model.layers.19.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.19.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.19.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.19.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.2.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.2.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.2.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.2.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.2.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.2.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.2.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.2.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 26228736
}
],
"md5sum": "b23388b0abec2b93501ad839fd01e7a3"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.20.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.20.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.20.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.20.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.20.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "9aa93f6e0bcc2a31533f2b83a1a95549"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "c29e544331322eea73bb5976c484f2db"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 26233344,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.20.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.20.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.21.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.21.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.21.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.21.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.21.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.21.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.21.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.21.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.22.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 26228736
}
],
"md5sum": "18630a0a198b23074f101cca2586bfaa"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.22.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.22.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.22.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.22.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.22.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.22.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "d6e209901d08415d5a2cb83592026917"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.23.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "b5105f0cdec9574d7515d2771dc55583"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 31099392,
"records": [
{
"name": "model.layers.22.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.22.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.22.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.22.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.23.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.23.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.23.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.23.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.23.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.23.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.23.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.23.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.23.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.23.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 26228736
},
{
"name": "model.layers.24.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 30947328
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 31094784
}
],
"md5sum": "3b98055de8ef4d2725ccfeb028742143"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.3.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.3.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.3.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.3.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "07064d842c054dbcbc6dea4178348a9c"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "5cf1189cf226b7d0f65c4df63ecf7200"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 26233344,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.3.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.3.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.3.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.4.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.4.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.4.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.4.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.4.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.4.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.4.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.4.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 26228736
}
],
"md5sum": "459c8aee5726bc193f0e491531d58beb"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.5.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.5.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.5.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.5.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "f6c1f9889747d6d59dd0839669341348"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "6b4f82cc47f7db7708a68fd45e925676"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 26233344,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.5.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.5.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.5.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.6.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.6.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.6.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.6.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.6.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.6.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.6.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.6.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 26228736
}
],
"md5sum": "35e4ccd4fb889dfee97fde8bc0842b1a"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.7.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.7.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.7.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.7.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "773effcddb960d7768765efce9d149c6"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "85b8cbdee48256f9849e8a3c0e733a30"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 26233344,
"records": [
{
"name": "model.layers.7.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.7.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.7.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.7.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.8.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.8.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.8.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.8.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.8.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 18929664
},
{
"name": "model.layers.8.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 23648256
},
{
"name": "model.layers.8.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 23795712
},
{
"name": "model.layers.8.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 26155008
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 26228736
}
],
"md5sum": "c8df07ee8a86718c69a315afea96ee34"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 32859648,
"records": [
{
"name": "model.layers.9.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 10616832
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 10948608
},
{
"name": "model.layers.9.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32182272
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32845824
},
{
"name": "model.layers.9.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32850432
},
{
"name": "model.layers.9.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32855040
}
],
"md5sum": "797ad42c82301b7d8c903b6c035c909a"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.24.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "0b614da46f9b3e79b371123a040b1dc8"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 21233664,
"records": [
{
"name": "model.layers.25.mlp.gate_up_proj.q_weight",
"shape": [
18432,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 21233664,
"byteOffset": 0
}
],
"md5sum": "4666d57b82be67735fafe78f3bd012e7"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 32993280,
"records": [
{
"name": "model.layers.9.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.9.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.9.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.9.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.layers.24.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
},
{
"name": "model.layers.24.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 7303680
},
{
"name": "model.layers.24.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 17920512
},
{
"name": "model.layers.24.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 18252288
},
{
"name": "model.layers.24.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18915840
},
{
"name": "model.layers.24.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18920448
},
{
"name": "model.layers.24.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 18925056
},
{
"name": "model.layers.24.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 18929664
},
{
"name": "model.layers.24.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 21288960
},
{
"name": "model.layers.25.input_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 21362688
},
{
"name": "model.layers.25.mlp.down_proj.q_weight",
"shape": [
2304,
1152
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 10616832,
"byteOffset": 21367296
},
{
"name": "model.layers.25.mlp.down_proj.q_scale",
"shape": [
2304,
72
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 331776,
"byteOffset": 31984128
},
{
"name": "model.layers.25.mlp.gate_up_proj.q_scale",
"shape": [
18432,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 663552,
"byteOffset": 32315904
},
{
"name": "model.layers.25.post_attention_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32979456
},
{
"name": "model.layers.25.post_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32984064
},
{
"name": "model.layers.25.pre_feedforward_layernorm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 32988672
}
],
"md5sum": "b2a9cd8fd11355137a64136ee681387b"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 7303680,
"records": [
{
"name": "model.layers.25.self_attn.qkv_proj.q_weight",
"shape": [
4096,
288
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 4718592,
"byteOffset": 0
},
{
"name": "model.layers.25.self_attn.qkv_proj.q_scale",
"shape": [
4096,
18
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 147456,
"byteOffset": 4718592
},
{
"name": "model.layers.25.self_attn.o_proj.q_weight",
"shape": [
2304,
256
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 2359296,
"byteOffset": 4866048
},
{
"name": "model.layers.25.self_attn.o_proj.q_scale",
"shape": [
2304,
16
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 73728,
"byteOffset": 7225344
},
{
"name": "model.norm.weight",
"shape": [
2304
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4608,
"byteOffset": 7299072
}
],
"md5sum": "f5e29d90ae0ec94d20515fdbaf87de31"
}
]
}