{ "metadata": { "ParamSize": 709, "ParamBytes": 16895535104.0, "BitsPerParam": 3.0067237203635373 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "lm_head.q_weight", "shape": [ 640, 152064 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "d38afcd82306594fb1f01a676a89254a" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.63.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "acda2786be9673b2ce87cc57b01b0b3a" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.63.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "286b3c85da12e65f0ee32f05ce67d90a" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 389283840, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 152064, 640 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 389283840, "byteOffset": 0 } ], "md5sum": "81dc4cbe141d73286c525c6e0b4e743f" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fe099c1452569ffed71b77209d807869" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0fa095862c96029d652a6544226ba2fd" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33218560, "records": [ { "name": "lm_head.q_scale", "shape": [ 40, 152064 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12165120, "byteOffset": 0 }, { "name": "model.layers.63.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 12165120 }, { "name": "model.layers.63.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 12175360 }, { "name": "model.layers.63.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 14387200 }, { "name": "model.layers.63.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18810880 }, { "name": "model.norm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 18821120 }, { "name": "model.embed_tokens.q_scale", "shape": [ 152064, 40 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 12165120, "byteOffset": 18831360 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 30996480 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 31006720 } ], "md5sum": "f3e94aee5a0a9351986e810eeb994fbf" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 23371776, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 4423680 }, { "name": "model.layers.0.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 4433920 }, { "name": "model.layers.0.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 4448256 }, { "name": "model.layers.0.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 22798336 } ], "md5sum": "f0bc07957ca3f7a68ff114f776a39015" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4f9958bfcd06555dda3c46be941d84e5" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "7e71867a4770b3d00e5f5a3b91f58abf" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.1.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ce44e1852c6418f78e0fde0a02cc86e5" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.1.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.1.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "63441832b665917822906599b0f95fc7" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "941b2a8252cb52bf66573bee4925f0fa" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f3cb59a00b241c7b23eb48de535b2b6d" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.2.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cec53c6ad9acea483afcfdced0820174" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.2.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.2.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "1a9a57369e0443d1e0480ab0905b0e9f" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0d23d4ff5e31847925164d915082a354" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.3.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c6e99c735d7311629717ed7fe1a62485" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "18b7c72b941ae836d87325e3d3dce0f9" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.3.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.3.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "210d4410e6d5b824ef1e387341029b69" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a89fc9065e98bea11eeea09d3a308ad4" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 25583616, "records": [ { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 2211840 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 6635520 }, { "name": "model.layers.10.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 6645760 }, { "name": "model.layers.10.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 6660096 }, { "name": "model.layers.10.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 25010176 } ], "md5sum": "cdd101ec02016ec8cf06f783f98c845c" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "daaf26be908e1987efa322ffab03e411" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a92db6cf1bfe65d8f77eb68704265bf1" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.11.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0fb776f9efe40ab67c8ec798720fae93" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.11.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.11.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "e5cf770e64f35dee919bbf07a5400f0f" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a46a68cb997d73d33976ae1fa745bf70" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e216b21f019ce9f475ae041d4222154d" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.12.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "eb133702f79a91e6122cbeefecc60d52" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.12.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.12.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "c9301b3da030f3a87d1495bdf363c9ca" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "741e42ec2b78543024a35b6848fe626f" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.13.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cb7faa5bef5980a8acf79e358f62c0ef" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "80e295345d0def9ec043929541f7daa0" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.13.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.13.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "ee9d728754a1d0de322c7b5d6469ec97" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "733251277da8b087c2d205c53294aa71" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1a0cbc61c9b08bda14d2be90d99f8a1c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "bc19090272fbdfd9c47bd831a37337db" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 32239616, "records": [ { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 2211840 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 6635520 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 6645760 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 6656000 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 8867840 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13291520 }, { "name": "model.layers.9.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 13301760 }, { "name": "model.layers.9.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 13316096 }, { "name": "model.layers.9.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 31666176 } ], "md5sum": "2629706adb26bba64d78ed0808542722" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "636277e1434de5495a37ea8664cec4c0" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "422e3efe4e824ba307c5726184472900" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "2328983881ef89c1b28b039ad4115415" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.14.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cc753dc33b62d262bd3588d5de840ca2" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 22992896, "records": [ { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15738880 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15749120 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 15759360 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17971200 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22394880 }, { "name": "model.layers.14.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 22405120 }, { "name": "model.layers.14.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 22419456 } ], "md5sum": "b397fa17be6ef07283bb9ac298c9e5b5" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "154524cd66ae525a1d91d525dc50b6a9" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "847e94bedfe4c912f150eef0391a78a3" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.15.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "647616f7600b3e7ffdbbb1cd067f6c05" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.15.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.15.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "b18ed4c7231280c8f5b52c18b35ea156" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9a5f77a02320c08bbcd6c5d4323eece9" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1ffb30c6cf0f1d0d7847b0d6ebad67fb" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.16.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ccd52a7403ea6b9560c6790b193a5fd8" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.16.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.16.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "a6d26d4cf16fc3bc79b608c3463c008b" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "33dc10f5d1633b6bb082e0ce6ba131c3" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c83345ad81185ef063449ed83fbf9b6a" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.17.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7c24b93f27c00d6159cab396490d5e65" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.17.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.17.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "15c960e33cb79848fff6d97a74740b0e" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "52a5d0f98ccb3cb1743b47fb79232523" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.18.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4610d2765f664fff473eaf583be37aff" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "426bf6b4e6c2631a69d2f8e24c1a1787" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.18.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.18.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "1b7a844c3de9613ac2443a2738a53bea" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "677d62edf49ac06f58b0d174d1e17f3f" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f8134125de28134e70a7d0a9f2880802" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.19.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.19.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.19.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "94e6cbe67bbf012d73ae4de165517256" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1f65b8b6fa62b30f4b8e6dce2c269e66" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "770d8a922a07d50e4b339f9ee62037b2" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.20.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "677fc54a00f9ae9d013ed8e7a3c713d6" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.20.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.20.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "77490fcad3c02b98c8de9e07c8c86428" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "849aa66abaf5f801618b909b39dc17e3" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "11d552441b44fa44728f0fd42e96943c" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.21.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "448e7ace26ca598a089ef057acd19aeb" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.21.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.21.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "482ab1979606f8a43128ad7ea32c51a0" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "9f5b7a80a212d44b165298aa40cfac64" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "adfb6ad05b75eba1801f698e563d295f" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.22.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "dbfe90845e977f2afccb8d49c5fe4933" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.22.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.22.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "bcbba6736037326185d700333a71b9aa" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "516e0c7a334a9dddea8560b123806cb4" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.23.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ae1837eba24c0495b29faecd5c7dab2d" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e06366e02818d75ab98bc30381044420" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.23.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.23.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "9f000010d5f38bcd90af3dc8105c155f" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "06bc6d2cbeaecf69be3369191edfe7fe" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c33b443ba6e80b66426197b9f6550fcf" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.24.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.24.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.24.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "3e526ce2b82bc44a5c4e720f4defd580" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "bd7e7adb49b9f262d912c3058d9c12ed" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9520255eccd73f0c3f43573fcc153282" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.25.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8b18ba064e36238fc01532290a91f320" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.25.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.25.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "ad5ded6836dd1ba17ce5b723f24cd4ec" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "991341efead3379fae44a1cd9a0acc99" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "cbe6060e6d0fe938ad48411981ffe8e3" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.26.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "a8662732d04af5b3bd8602843528fb06" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.26.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.26.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "9995a2b6cb2b75e958552aae2ddb3d50" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "2df0929609b159ef9dd26832579e479e" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "72be7d5648737c7d0be5283d459d87b5" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.27.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "da13409c49de26541251219412c7da4e" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.27.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.27.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "7f0ec36f874ed086a837d41fec1bde86" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "3a18ef04802c8757a446cf950c89bce5" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.28.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "4fb8ab3a1a1d87c82ebfd48595dd267d" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b4071765ae236cea87a86c0455189c45" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.28.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.28.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "ec752b3269c9cf467ab3c05156d7ce1b" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a41f14e0624d4aa5b7c34cd474cfd2e7" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a563b37cbd71f44f06298f03340cab77" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.29.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.29.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.29.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "55c45ec249107bdbfbbad815cb7e8036" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.30.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "8a8f2ddddde399ae9192f18fd4547b6b" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.30.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "95e56040860d3b8dbede826b22cec795" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.30.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "abb787ba6e46a7417bc8221c6af3a7c8" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.30.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.30.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.30.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.30.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.30.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.30.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "4dfb1be23a7187129685c8af34f3ca25" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.31.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "35e37f980205859775df79791147e510" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.31.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "be20f0ee5de20481a0068da91e59f364" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.31.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "eed7b3feb3cb029c1d8262139807f0d3" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.30.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.30.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.31.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.31.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.31.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.31.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.31.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.31.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "99a0476844e6fec86ba0d1e75d124b00" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.32.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d3ba487355af5dc8a00e394a39fdc3f9" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.32.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "852a5d447fdd4d9b867fbc58611f5d9f" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.32.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "337beadf3f290e087fe704e8cef73416" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.31.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.31.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.32.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.32.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.32.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.32.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.32.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.32.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "8f3db0e01f28e6ce65a4fbbb84e743e1" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.33.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "51270eac69cc1548b7f34ffbf3ccedfb" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.33.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3dda21b38ed5f95dcaff807890daed33" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d166efd6e663862326dd0a9df9fd0959" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.32.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.32.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.33.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.33.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.33.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.33.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.33.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "06d973ceb3519d20e81be851298623bb" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4783d3c077f98ce834f1547e5fc72a1a" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "1ec2794648fdb0ca40ade1eac403f756" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.4.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.4.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.4.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "1e38a22979e2eeba9e7d34599e14dfc4" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a92eb6917515ec0905f153e48353e2cd" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ddfb497e4cce5c634c23226c03788991" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.5.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "55f78c97208f04a1b6b97a087526e41f" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.5.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.5.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "41670981366657a70745a28b70fb456f" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "5fcb8b7c316f58a3c3620aec2b0a8604" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "46b4d518c1970e3243605aa274c3e24a" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.6.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "071d970eccffb4c22e48cc4ab86e2388" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.6.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.6.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "12a5096bccc3abe702f02f6ecf175307" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "15dba97d9d6b228fc2d4998083ceed8b" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fa16ebbd0546c571288c3557bc734218" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.7.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "58100f3a7d802201c6e3b3c4681cb754" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.7.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.7.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "965e2e2f755c12ae34dadcae356a1b0a" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.8.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 13516800 }, { "name": "model.layers.8.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 13531136 }, { "name": "model.layers.8.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 31881216 } ], "md5sum": "c5f8480542a3eb2b0c05c1a3d31c8746" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.33.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "28c8f3ae7171167efcf2b7ccc8677a90" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.34.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "fff2b8442e6f3978843ce5d570cb8eb5" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.34.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "73f0f2d74d782631ffcaed993e7e5954" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.34.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "54af7f72f723c8c92dfb71182950e7e5" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 22992896, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.33.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.33.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.33.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15738880 }, { "name": "model.layers.34.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 15749120 }, { "name": "model.layers.34.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 15759360 }, { "name": "model.layers.34.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 17971200 }, { "name": "model.layers.34.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 22394880 }, { "name": "model.layers.34.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 22405120 }, { "name": "model.layers.34.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 22419456 } ], "md5sum": "aabba2f6eec353a44d5448534a5921fd" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.35.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "cf4274cfc9a0b1d38715e7ff874c6604" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.35.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9a55553b13b282e32264b057414ab44f" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.35.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c4f78b8ec84425a3e341b941ef6f24e3" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.34.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.34.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.35.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.35.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.35.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.35.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.35.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.35.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "db83fb0d62a03dbb343b693c9b3fc5cc" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.36.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7a924f3546d1f17ec1d6c02f9b19f555" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.36.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "c5c3594994b05a9226d19d3a340c6ea5" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.36.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3900685cdc3ad30076e80ee20f3e1178" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.35.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.35.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.36.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.36.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.36.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.36.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.36.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.36.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "72ec0af4fc2e254a449bea119ae27628" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.37.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "30e09008fdab2b05d8c6b6fabda1ae09" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.37.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "62da39ef32ff78eb5f7ec064d182fad1" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.37.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "3334dbdfb55ccbba1c79d00187aa99f1" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.36.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.36.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.37.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.37.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.37.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.37.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.37.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.37.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "1b4d6c6929a4cdf0d213403bbe8d9319" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.38.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0a9c2f4fb77d16c8a32030ff09efc40d" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.38.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "94fc019f90a84dfcda7da1ba3ebe7ac6" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.38.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "09c19f09d6002e918ec7670c1202204d" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.37.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.37.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.38.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.38.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.38.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.38.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.38.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.38.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "968cb5915bf1099a23820a133c208ee7" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.39.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "a792204398660658c51bec123b3937e4" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.39.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0f11eb715ad7f44ef7f51cfaace1a7a1" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.38.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.38.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.39.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.39.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.39.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.39.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.39.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.39.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.39.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "71785c0d85526778a0e76e6bab66ae9e" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.40.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "e2cf6a355e7a4cb1402768cf3f7b9d0e" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.40.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "cb6fac174acd8f07f9b2a9b7d398ea55" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.40.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "dd782575907b6a133af98713ea75db47" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.39.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.39.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.40.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.40.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.40.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.40.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.40.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.40.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "507e8440576f3a909c8c165f8011bd19" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.41.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "536b89c2d5c20d3acaf2a525cbd094cf" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.41.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "0e50f98eff90a53d40c70ecb45f0f4d0" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.41.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "6fdb73ff8e45c0476640cf958ae08547" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.40.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.40.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.41.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.41.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.41.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.41.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.41.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.41.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "8b7b8fd2ccb2af5c90be8f7fbe4edc46" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.42.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "7f3e99f133fa2bb1be26519f25b18968" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.42.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "972318bb4df8bf3fa72c7507a12837da" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.42.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "53d009f3c83495ea5edf151df344e46b" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.41.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.41.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.42.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.42.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.42.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.42.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.42.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.42.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "47540bf4282a45b8c44e6503efaace9c" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.43.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "fdd80034ff28e6803426964bf71514f8" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.43.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8a61baf2ac4f95962125aeeba7f5368d" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.43.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f0e866819a68f5d71b076420c4d7f58c" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.42.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.42.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.43.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.43.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.43.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.43.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.43.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.43.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "1428f850c350fb1c568dfef088423f52" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.44.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f06bf939cceb6c89b85fab6325ea4ab3" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.44.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "559a6b462ab083907cf8c1f3248d7806" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.43.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.43.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.44.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.44.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.44.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.44.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.44.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.44.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.44.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "4e09db080c2e85c4875147cf80490457" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.45.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f1acf7809e4e703ef1ee636a9ebb0eb6" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.45.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5d9dee209e221c1e10f9200b0423f78b" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.45.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ea7068f592075fc6727b3b66c89f09e8" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.44.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.44.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.45.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.45.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.45.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.45.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.45.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.45.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "666d036005209240494e2b604efa90a7" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.46.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "372be280d9e1c5c324108b806cddb9e8" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.46.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "973bdadb45e41ccaa1ed6cabde183ff7" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.46.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "5c66412115c3681bf8d62969e6c5e699" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.45.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.45.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.46.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.46.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.46.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.46.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.46.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.46.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "3f8d3e0ce96b9000e9f66de083e91b81" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.47.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f4b813f4c8cfc72839c15d02ed4a5fca" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.47.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "e1c852be2a1eeabd3959d01c4d146dfa" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.47.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "14c8bc706d7a3c2eb9554ab35dcb8e05" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.46.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.46.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.47.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.47.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.47.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.47.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.47.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.47.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "97437794891562a9e27fac9ba64cb189" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.48.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5a302b60e3abe401db0800c4a35ff2da" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.48.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "c381af65da35742403e6049ff712dad9" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.48.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f8f4392b6d65957ec366280f8becbc84" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.47.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.47.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.48.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.48.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.48.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.48.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.48.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.48.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "a77f78b6b02f17147fc220fba6ae5baa" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.49.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "d56c1f8c87732b83ee3e1c441b0ec3bc" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.49.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "ff84fb15ec43b4a93254c2f54a5df08a" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.48.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.48.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.49.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.49.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.49.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.49.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.49.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.49.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.49.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "04b722a684398c5c6414c16f112cd24e" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.50.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "0b52e2d48fcd8c8c41c8b8fdf4309fc0" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.50.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "40105a11b07e999155683683c5e91ae2" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.50.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d9fbc19087fd63bae437f671c74efbc3" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.49.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.49.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.50.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.50.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.50.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.50.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.50.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.50.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "6cdef4a7706eb909be7d4852854b9114" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.51.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4d1f28a1b042075b87e80e545e817e0e" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.51.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9d959d9be2019e88cce53bddc33f37c7" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.51.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "0c2dff11d47a3c2e746037957ed9e92d" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.50.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.50.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.51.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.51.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.51.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.51.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.51.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.51.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "2a42a6da6a6b87131ad9ee3f3be0c35d" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.52.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "de81ef363fe75f1e1b19dab7d46a475b" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.52.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "32421725decaa744404e428da184ae02" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.52.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b10f19a00be3bd70ddafa032bedbf035" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.51.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.51.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.52.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.52.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.52.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.52.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.52.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.52.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "6c19d0a8f8332eecac4c4d42f20dff69" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.53.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5455b9d3808f79efe455a411c5a90b02" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.53.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "d182c89791e87c6999705b4acf08451a" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.53.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "4d145e344372ae0f31915278de9aff0b" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.52.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.52.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.53.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.53.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.53.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.53.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.53.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.53.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "a48fba8ba201df93c897825b4e166880" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.54.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "603a2295996572acfe9eeff16e0b7459" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.54.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "95e5b8ad825803e1134c23945d3272bb" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.53.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.53.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.54.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.54.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.54.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.54.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.54.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.54.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.54.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "aeb4fb074fc60b8fddf4085f3f379a30" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.55.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "6f443ab0c92dc0b3b3c10256ee1f3ef2" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.55.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "d610aa4a27b923ea12f5a5f7d9a3a3fc" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.55.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "b0e15a7716d7dc3c5abfa27dae3fb990" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.54.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.54.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.55.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.55.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.55.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.55.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.55.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.55.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "696e68138430dea53e97ec812956b0cf" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.56.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "920381a9af11a4475c3cd530efd93159" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.56.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "9257d1bd1b581639e8999c38a50a86bc" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.56.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "8d66dc01d60818a69bdcb2138f1ad062" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.55.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.55.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.56.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.56.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.56.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.56.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.56.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.56.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "00aefbd4e0d694dee1392084a3db7e93" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.57.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "20adcdd0d90d4869365298e7e5b4739b" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.57.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "f12a89701e0034d22939970481431fce" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.57.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "1358114cfc69880bf58a8f7c5a647c8c" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.56.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.56.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.57.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.57.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.57.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.57.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.57.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.57.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "5b8a4875d3915626676177fa0662e2b1" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.58.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "eaf16f62426a3e627d021d4179b09dde" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.58.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "ca516e64973d51f8aa906fa3234998f1" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.58.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "b4691fa8eb53e0a7a4d63737bd5e0759" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 32055296, "records": [ { "name": "model.layers.57.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.57.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.58.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 13516800 }, { "name": "model.layers.58.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 17940480 }, { "name": "model.layers.58.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 17954816 }, { "name": "model.layers.58.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 18528256 }, { "name": "model.layers.58.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 31635456 }, { "name": "model.layers.58.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 32045056 } ], "md5sum": "49bc2595398a821a5c87f8c0e8a9886b" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.59.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "f559f543787190f88694578815e95ae3" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.59.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "81bbd5f5433ba23b2f723d75a71a3b77" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 27815936, "records": [ { "name": "model.layers.58.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 0 }, { "name": "model.layers.58.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2211840 }, { "name": "model.layers.59.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 2222080 }, { "name": "model.layers.59.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 2232320 }, { "name": "model.layers.59.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 4444160 }, { "name": "model.layers.59.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 8867840 }, { "name": "model.layers.59.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 8878080 }, { "name": "model.layers.59.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 8892416 }, { "name": "model.layers.59.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 27242496 } ], "md5sum": "15b0ad08bc72ce2cdb4576933f760fcb" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.60.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "619c7a4927768055483d660216481f90" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.60.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "5a42d330cb404b09274f6d7612e52e64" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.60.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "909081c03700a30877ed8a96acc5b4b9" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.59.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.59.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.60.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.60.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.60.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.60.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.60.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.60.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "0314825f48c118c07526868ad9d04305" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.61.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "963e7abaf2434e783299da254cc63a4f" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.61.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a8f23bdd28b53774a6bb793be20538c2" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.61.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "cd6803e7788f32207eadc85086d263ff" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.60.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.60.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.61.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.61.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.61.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.61.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.61.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.61.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "bdfd269e7825dfbf74f0754930007ca3" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 70778880, "records": [ { "name": "model.layers.62.mlp.down_proj.q_weight", "shape": [ 3456, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 70778880, "byteOffset": 0 } ], "md5sum": "1003c01e4207a2a930ee2d04c69a5f6f" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 141557760, "records": [ { "name": "model.layers.62.mlp.gate_up_proj.q_weight", "shape": [ 640, 55296 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 141557760, "byteOffset": 0 } ], "md5sum": "a915b0184d49836553b7875a1d9d394a" }, { "dataPath": "params_shard_242.bin", "format": "raw-shard", "nbytes": 18350080, "records": [ { "name": "model.layers.62.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 0 } ], "md5sum": "7807958fca2e915e986b965150ec3cf0" }, { "dataPath": "params_shard_243.bin", "format": "raw-shard", "nbytes": 20760576, "records": [ { "name": "model.layers.61.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.61.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.62.input_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 13516800 }, { "name": "model.layers.62.mlp.down_proj.q_scale", "shape": [ 216, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2211840, "byteOffset": 13527040 }, { "name": "model.layers.62.mlp.gate_up_proj.q_scale", "shape": [ 40, 55296 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4423680, "byteOffset": 15738880 }, { "name": "model.layers.62.post_attention_layernorm.weight", "shape": [ 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10240, "byteOffset": 20162560 }, { "name": "model.layers.62.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 20172800 }, { "name": "model.layers.62.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 20187136 } ], "md5sum": "b5d7d22cb6ff6c8fc86f429862d8c29a" }, { "dataPath": "params_shard_244.bin", "format": "raw-shard", "nbytes": 32454656, "records": [ { "name": "model.layers.62.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.62.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 }, { "name": "model.layers.63.self_attn.c_attn.bias", "shape": [ 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 14336, "byteOffset": 13516800 }, { "name": "model.layers.63.self_attn.c_attn.q_weight", "shape": [ 640, 7168 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18350080, "byteOffset": 13531136 }, { "name": "model.layers.63.self_attn.c_attn.q_scale", "shape": [ 40, 7168 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 573440, "byteOffset": 31881216 } ], "md5sum": "b0109ad1c92f3a639698dd397daad203" }, { "dataPath": "params_shard_245.bin", "format": "raw-shard", "nbytes": 13516800, "records": [ { "name": "model.layers.63.self_attn.o_proj.q_weight", "shape": [ 640, 5120 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 13107200, "byteOffset": 0 }, { "name": "model.layers.63.self_attn.o_proj.q_scale", "shape": [ 40, 5120 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 409600, "byteOffset": 13107200 } ], "md5sum": "edd7bc24dc83152620952be83d80b49f" } ] }