{ "metadata": { "ParamSize": 315, "ParamBytes": 1470915072.0, "BitsPerParam": 4.501064160740709 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 294912000, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 256000, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 294912000, "byteOffset": 0 } ], "md5sum": "c6823eae835c74b2a722af65fa320115" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 36864000, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 256000, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 36864000, "byteOffset": 0 } ], "md5sum": "d821bebaac91e21fbfdb882416dfab71" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33182208, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 0 }, { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 4608 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10621440 }, { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11948544 } ], "md5sum": "dbba9edcd6ddd849f2fbfb3f6ea4591c" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "f3a69d6d3ad870ab8106cfdc62fec7d0" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.0.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.0.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.1.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.1.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "0de9a80620ae274cba4bed508b0a6d1f" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "5dd1ac9ca0e41958144cd72a41e95840" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "be48ec197105525ed85c5a03abec1f96" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.10.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.10.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.11.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.11.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "856733c305b4cba5af97f81848694477" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "5df281a9b045e85a08aa9597fa9884f0" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "f56846c123474d4a5a031c9ab1064fd3" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.12.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.12.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.13.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.13.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "00bc2100770f744db9bc50e9aaeaa290" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "e7c7a8199eafa92c9dd89644982ac818" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "38c87666d572b838166e8192122ca646" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.14.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.14.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.15.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.15.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "d6f2c757ac9be8f9c74653758c1035eb" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "3385141c5402915d52eae44aa4dbb068" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "789ddeeb1c00d826958c7468638ca377" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.16.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.16.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.17.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.17.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "6f5ee8f451442e580cfb21cfc3981a55" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "528a9d537804da8a1fd390e485d78ea5" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "881d77566b2806044968edca88ea958e" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.18.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.18.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.19.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.19.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "bcebd45e6a4cd326f5a9b0eb5cb55bff" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "d40ca0038d48b3f94c2867ffa5552d66" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "df80441c58aeb6a13583c36a6c0411a4" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.2.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.2.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.20.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.20.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "8b68a84ee1d4e4f549b867dbbfab65b4" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "62222744274554879d1e290d9af27e17" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "5719a3a5e8302c6d769e42fd8c03f76b" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.21.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.21.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.22.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.22.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "7f37506dff7002155d33a5ad1cf9f2a1" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "90c72bad2e2557e987d0539b124a426b" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 31864320, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.23.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.23.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 10630656 } ], "md5sum": "2feaa8b66b1684144f949ad386c2135a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "37c333a74ce91060c71c1363fe3c861b" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33200640, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2654208 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7372800 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7962624 }, { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10321920 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10616832 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10621440 }, { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21238272 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22565376 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25219584 }, { "name": "model.layers.3.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25224192 }, { "name": "model.layers.3.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25228800 }, { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25233408 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29952000 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30541824 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32901120 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33196032 } ], "md5sum": "671bdf7940ca33888c8f40cb55f461b9" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "009cc86dafeda3f06461616d1c02f302" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "3b87d6f18d4f2d5817f5bb32ad8779f7" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.4.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.4.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.5.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.5.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "d0105a9efdc42f8bf10f6d1ed74b3b19" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "b8b55899d084f807bf2b938bf4c2e75a" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "5667d47c0154a8c9107a0bfa61af764d" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.6.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.6.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.7.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.7.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "388eb2f83712d89e4f09c33c459b227c" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 33177600, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 11943936 } ], "md5sum": "17ff8531301f3fa26e4f759e2e97d036" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "649cf3a5f0834a5abde81a36b0d19429" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 33214464, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 0 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 }, { "name": "model.layers.8.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2658816 }, { "name": "model.layers.8.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2663424 }, { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 2668032 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 7386624 }, { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 7976448 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 10335744 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 10630656 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 10635264 }, { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 21252096 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 22579200 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25233408 }, { "name": "model.layers.9.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25238016 }, { "name": "model.layers.9.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 25242624 }, { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 25247232 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 29965824 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 30555648 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 32914944 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 33209856 } ], "md5sum": "f5ebb580e190069ba02c13c1e70cbd15" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 21233664, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 18432, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 21233664, "byteOffset": 0 } ], "md5sum": "c68ce2d0b25c773b1f0db4dc0ca7dff5" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31882752, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 0 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 10616832 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11943936 }, { "name": "model.layers.24.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11948544 }, { "name": "model.layers.24.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11953152 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 11957760 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 2304, 1152 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 10616832, "byteOffset": 11962368 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 2304, 288 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1327104, "byteOffset": 22579200 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 18432, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 2654208, "byteOffset": 23906304 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 26560512 }, { "name": "model.layers.25.post_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 26565120 }, { "name": "model.layers.25.pre_feedforward_layernorm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 26569728 }, { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 4096, 288 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 4718592, "byteOffset": 26574336 }, { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 4096, 72 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 589824, "byteOffset": 31292928 } ], "md5sum": "6410635bd6252c5b5e4144d1de91a7c8" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 2658816, "records": [ { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 2304, 256 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 2304, 64 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 294912, "byteOffset": 2359296 }, { "name": "model.norm.weight", "shape": [ 2304 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4608, "byteOffset": 2654208 } ], "md5sum": "b25ced867ce5edd57446a06cffea6a7b" } ] }