{ "metadata": { "ParamSize": 405, "ParamBytes": 1692119808.0, "BitsPerParam": 4.500713551972092 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 141419520, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 122760, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141419520, "byteOffset": 0 } ], "md5sum": "673a7768cedf15f588c8982fbb19a2a4" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 141419520, "records": [ { "name": "lm_head.q_weight", "shape": [ 122760, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141419520, "byteOffset": 0 } ], "md5sum": "f905741e89ce94cf94c67ea8dd3065ac" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 17677440, "records": [ { "name": "lm_head.q_scale", "shape": [ 122760, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17677440, "byteOffset": 0 } ], "md5sum": "3877afae3c12668e8b7d093b4d8ad572" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29635200, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 122760, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 17677440, "byteOffset": 0 }, { "name": "model.norm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17677440 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 17682048 }, { "name": "model.layers.0.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 17686656 }, { "name": "model.layers.0.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 25649280 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 26644608 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 29298816 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 29630592 } ], "md5sum": "085ef960e2f6c8132e3fb444a605e452" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 31357440, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 0 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 13271040 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 14929920 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 21565440 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 22394880 }, { "name": "model.layers.1.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 22399488 }, { "name": "model.layers.1.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 30362112 } ], "md5sum": "022837b6bdaff3659897cfe5d78537c9" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33352704, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 2654208 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2985984 }, { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 2990592 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 16261632 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 17920512 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 24556032 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25385472 }, { "name": "model.layers.2.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 25390080 } ], "md5sum": "c27346f8415db11e1da2c9243c4f21fa" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 26385408, "records": [ { "name": "model.layers.2.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 995328 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 3649536 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 3981312 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 3985920 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 17256960 }, { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 18915840 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 25551360 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 26380800 } ], "md5sum": "5583a2127c66d85de1337f2f09fbe8e5" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33513984, "records": [ { "name": "model.layers.3.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 0 }, { "name": "model.layers.3.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 7962624 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 8957952 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 11612160 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11943936 }, { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 11948544 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 25219584 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 26878464 } ], "md5sum": "f5413b64b04950c996f6702c22853185" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 27712512, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 0 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 829440 }, { "name": "model.layers.4.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 834048 }, { "name": "model.layers.4.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 8796672 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 9792000 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 12446208 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 12777984 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 12782592 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 26053632 } ], "md5sum": "7b76244f6188c18620eb4727b5ff2ed8" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 32689152, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 0 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 6635520 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 7464960 }, { "name": "model.layers.5.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 7469568 }, { "name": "model.layers.5.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 15432192 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 16427520 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 19081728 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 19413504 }, { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 19418112 } ], "md5sum": "b7d58215dabb597121af7dd961f6f84c" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 21076992, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 1658880 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 8294400 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9123840 }, { "name": "model.layers.6.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 9128448 }, { "name": "model.layers.6.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 17091072 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 18086400 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 20740608 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 21072384 } ], "md5sum": "d185995620198a7c2ed217942d22a90f" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 31357440, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 0 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 13271040 }, { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 14929920 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 21565440 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 22394880 }, { "name": "model.layers.7.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 22399488 }, { "name": "model.layers.7.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 30362112 } ], "md5sum": "7ae1d3896da91089c113f0cdd5a16c40" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33352704, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 2654208 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2985984 }, { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 2990592 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 16261632 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 17920512 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 24556032 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25385472 }, { "name": "model.layers.8.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 25390080 } ], "md5sum": "48f2f038307f162624ece8751f7a77e1" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 26385408, "records": [ { "name": "model.layers.8.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 995328 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 3649536 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 3981312 }, { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 3985920 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 17256960 }, { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 18915840 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 25551360 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 26380800 } ], "md5sum": "9812e52a470f3b6eb23d46669395e251" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33513984, "records": [ { "name": "model.layers.9.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 7962624 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 8957952 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 11612160 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11943936 }, { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 11948544 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 25219584 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 26878464 } ], "md5sum": "bccced049950d692c8971a2497170bde" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 27712512, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 0 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 829440 }, { "name": "model.layers.10.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 834048 }, { "name": "model.layers.10.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 8796672 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 9792000 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 12446208 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 12777984 }, { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 12782592 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 26053632 } ], "md5sum": "edcc023bef5daa1ba8b8026abf91dde3" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 32689152, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 6635520 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 7464960 }, { "name": "model.layers.11.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 7469568 }, { "name": "model.layers.11.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 15432192 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 16427520 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 19081728 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 19413504 }, { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 19418112 } ], "md5sum": "e61459ee617321735c15b7b0abb03bad" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 21076992, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 1658880 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 8294400 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9123840 }, { "name": "model.layers.12.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 9128448 }, { "name": "model.layers.12.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 17091072 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 18086400 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 20740608 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 21072384 } ], "md5sum": "fb932d92393c5478da72034a79c3e17f" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 31357440, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 0 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 13271040 }, { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 14929920 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 21565440 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 22394880 }, { "name": "model.layers.13.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 22399488 }, { "name": "model.layers.13.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 30362112 } ], "md5sum": "61c8c8f37ab150eabeebfc1639361b7b" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33352704, "records": [ { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 2654208 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2985984 }, { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 2990592 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 16261632 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 17920512 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 24556032 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25385472 }, { "name": "model.layers.14.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 25390080 } ], "md5sum": "47f24b91b27a5e5a684130e13c690d47" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 26385408, "records": [ { "name": "model.layers.14.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 995328 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 3649536 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 3981312 }, { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 3985920 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 17256960 }, { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 18915840 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 25551360 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 26380800 } ], "md5sum": "cdfa99bbb314239b6247ea6ea7063565" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 33513984, "records": [ { "name": "model.layers.15.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 7962624 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 8957952 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 11612160 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11943936 }, { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 11948544 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 25219584 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 26878464 } ], "md5sum": "436fba893a097c52533be5b5409522ba" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 27712512, "records": [ { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 0 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 829440 }, { "name": "model.layers.16.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 834048 }, { "name": "model.layers.16.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 8796672 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 9792000 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 12446208 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 12777984 }, { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 12782592 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 26053632 } ], "md5sum": "8512f451b87f7f62fe3b5602ca0b8143" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 32689152, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 6635520 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 7464960 }, { "name": "model.layers.17.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 7469568 }, { "name": "model.layers.17.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 15432192 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 16427520 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 19081728 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 19413504 }, { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 19418112 } ], "md5sum": "8eb937c7723d2dcc0bdfeca4f5ec0064" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 21076992, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 1658880 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 8294400 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9123840 }, { "name": "model.layers.18.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 9128448 }, { "name": "model.layers.18.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 17091072 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 18086400 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 20740608 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 21072384 } ], "md5sum": "d268a1e5e2134681469776369e545ee7" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31357440, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 0 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 13271040 }, { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 14929920 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 21565440 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 22394880 }, { "name": "model.layers.19.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 22399488 }, { "name": "model.layers.19.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 30362112 } ], "md5sum": "acd08cfd0a3e23c47392323f6ceff52b" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33352704, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 2654208 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2985984 }, { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 2990592 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 16261632 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 17920512 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 24556032 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25385472 }, { "name": "model.layers.20.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 25390080 } ], "md5sum": "4d521c96873595e62ca348825935f929" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 26385408, "records": [ { "name": "model.layers.20.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 995328 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 3649536 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 3981312 }, { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 3985920 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 17256960 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 18915840 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 25551360 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 26380800 } ], "md5sum": "60648647d3c34883231d031a06d8a327" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 33513984, "records": [ { "name": "model.layers.21.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 7962624 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 8957952 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 11612160 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11943936 }, { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 11948544 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 25219584 }, { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 26878464 } ], "md5sum": "4d1d2bfa37467fdbad72640fd3c4392d" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 27712512, "records": [ { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 829440 }, { "name": "model.layers.22.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 834048 }, { "name": "model.layers.22.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 8796672 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 9792000 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 12446208 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 12777984 }, { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 12782592 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 26053632 } ], "md5sum": "b106bab4f05c51001c5be6939f7936df" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 32689152, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 0 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 6635520 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 7464960 }, { "name": "model.layers.23.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 7469568 }, { "name": "model.layers.23.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 15432192 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 16427520 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 19081728 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 19413504 }, { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 19418112 } ], "md5sum": "8dff849a1597e03193110c587588f28f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 21076992, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 1658880 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 8294400 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9123840 }, { "name": "model.layers.24.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 9128448 }, { "name": "model.layers.24.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 17091072 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 18086400 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 20740608 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 21072384 } ], "md5sum": "ffe49e22b525b2ca9c422e930863fd6d" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 31357440, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 0 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 13271040 }, { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 14929920 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 21565440 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 22394880 }, { "name": "model.layers.25.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 22399488 }, { "name": "model.layers.25.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 30362112 } ], "md5sum": "819339c3705965b5cc4ffeba9515531d" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33352704, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 2654208 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2985984 }, { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 2990592 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 16261632 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 17920512 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 24556032 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25385472 }, { "name": "model.layers.26.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 25390080 } ], "md5sum": "246505c91eb3c83919d381147143ce42" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 26385408, "records": [ { "name": "model.layers.26.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 995328 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 3649536 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 3981312 }, { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 3985920 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 17256960 }, { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 18915840 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 25551360 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 26380800 } ], "md5sum": "d6bea613808438798ac157eb503ba707" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33513984, "records": [ { "name": "model.layers.27.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 7962624 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 8957952 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 11612160 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11943936 }, { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 11948544 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 25219584 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 26878464 } ], "md5sum": "6e9eec8ba391133e276a4c26d16ac1f7" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 27712512, "records": [ { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 0 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 829440 }, { "name": "model.layers.28.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 834048 }, { "name": "model.layers.28.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 8796672 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 9792000 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 12446208 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 12777984 }, { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 12782592 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 26053632 } ], "md5sum": "14857aaa301e493b9dc7b95964d8b2f3" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32689152, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 0 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 6635520 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 7464960 }, { "name": "model.layers.29.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 7469568 }, { "name": "model.layers.29.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 15432192 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 16427520 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 19081728 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 19413504 }, { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 19418112 } ], "md5sum": "32a11753fd3352de32a58a034ca84216" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 21076992, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 0 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 1658880 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 8294400 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9123840 }, { "name": "model.layers.30.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 9128448 }, { "name": "model.layers.30.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 17091072 }, { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 18086400 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 20740608 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 21072384 } ], "md5sum": "c5310893e62902b8bcd809641d477066" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 31357440, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 0 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 13271040 }, { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 14929920 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 21565440 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 22394880 }, { "name": "model.layers.31.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 22399488 }, { "name": "model.layers.31.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 30362112 } ], "md5sum": "1978547a205de5b5ed1db9b30317048e" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 33352704, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 2654208 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2985984 }, { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 2990592 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 16261632 }, { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 17920512 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 24556032 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25385472 }, { "name": "model.layers.32.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 25390080 } ], "md5sum": "8083282bca9b01b1a42d92ee6dade96f" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 26385408, "records": [ { "name": "model.layers.32.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 995328 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 3649536 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 3981312 }, { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 3985920 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 17256960 }, { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 18915840 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 25551360 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 26380800 } ], "md5sum": "6c371e43d8e96ca8fa0246002fff3085" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 33513984, "records": [ { "name": "model.layers.33.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 0 }, { "name": "model.layers.33.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 7962624 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 8957952 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 11612160 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11943936 }, { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 11948544 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 25219584 }, { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 26878464 } ], "md5sum": "f60d1b568010e488f1cb63f2d6be100a" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 27712512, "records": [ { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 0 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 829440 }, { "name": "model.layers.34.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 834048 }, { "name": "model.layers.34.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 8796672 }, { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 9792000 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 12446208 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 12777984 }, { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 12782592 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 26053632 } ], "md5sum": "028cb8eb3d6eebee9e8aaaed2c85a8a5" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 32689152, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 0 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 6635520 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 7464960 }, { "name": "model.layers.35.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 7469568 }, { "name": "model.layers.35.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 15432192 }, { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 16427520 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 19081728 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 19413504 }, { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 19418112 } ], "md5sum": "f692b0bd09b30dd1a32f65b3ec15d6af" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 21076992, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 0 }, { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 1658880 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 8294400 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 9123840 }, { "name": "model.layers.36.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 9128448 }, { "name": "model.layers.36.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 17091072 }, { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 18086400 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 20740608 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 21072384 } ], "md5sum": "8bb3b9a458f2ddbada9d92e17c2ad4db" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 31357440, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 0 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 13271040 }, { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 14929920 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 21565440 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 22394880 }, { "name": "model.layers.37.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 22399488 }, { "name": "model.layers.37.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 30362112 } ], "md5sum": "05e52f40adae99c093fb9eeb4688fa08" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 33352704, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 2654208 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2985984 }, { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 2990592 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 16261632 }, { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 17920512 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 24556032 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25385472 }, { "name": "model.layers.38.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 25390080 } ], "md5sum": "15ed1983becd2473774f24baef7afdc8" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 26385408, "records": [ { "name": "model.layers.38.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 0 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 995328 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 3649536 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 3981312 }, { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 3985920 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 17256960 }, { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 18915840 }, { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 25551360 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 26380800 } ], "md5sum": "5b23b6d46d8ab4ffd4d295ca7d336f09" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33513984, "records": [ { "name": "model.layers.39.self_attn.wqkv_pack.q_weight", "shape": [ 6912, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 7962624, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.wqkv_pack.q_scale", "shape": [ 6912, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 995328, "byteOffset": 7962624 }, { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 2304, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 8957952 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 2304, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 331776, "byteOffset": 11612160 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11943936 }, { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 11520, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13271040, "byteOffset": 11948544 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 11520, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1658880, "byteOffset": 25219584 }, { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 2304, 720 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 6635520, "byteOffset": 26878464 } ], "md5sum": "daa3f32720eb65cdc349f077aba714d7" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 829440, "records": [ { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 2304, 180 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 829440, "byteOffset": 0 } ], "md5sum": "6a7d2e49d33a040c1fe8533f541dd351" } ] }