|
{ |
|
"metadata": { |
|
"ParamSize": 485, |
|
"ParamBytes": 12415229952.0, |
|
"BitsPerParam": 4.553632488924832 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "716263458277ff83a3c46d6ce71cb2e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "822af490c3f00cb92dee80f82efdaf4a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21233664 |
|
}, |
|
{ |
|
"name": "model.layers.0.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24379392 |
|
} |
|
], |
|
"md5sum": "50b5a8c597ee10cf9bc7cb4fa7f42de1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "62f9bc64bbaf245d93e96588af3f77ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 284295168, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.q_weight", |
|
"shape": [ |
|
92544, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 284295168, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "33023165380a281e29e6c0891a18ac15" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35536896, |
|
"records": [ |
|
{ |
|
"name": "model.tok_embeddings.q_scale", |
|
"shape": [ |
|
92544, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 35536896, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f263786d832df47074583e9f354a1e13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96b447fd7c2184937db674437dab9843" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bd69b76c0a28499d89219b5b2f3e76a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "01cfc9de14070a6e1bba13da2cc46949" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.0.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.1.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "d3b17fe7e5eab772e75afd02d45b4ab2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5d8f188559e6a439e59d930b7d9fa579" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4005d53bcbf910fda00b8c4d5b5cb7e5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3fead2bf931e44cad7e79d48ef0a9fe0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9fb1045f34211028c510010ebc95af7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.1.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.1.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.2.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "f2b7684535c4547cacc6c5bab05f4041" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "99a0ee2dd8cbb1c9fa47528046b7a4fb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ff931500a186275ffda8e7ac75f8126" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "36946b926dc1a54a33fb39691e988b08" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df7b7f2e51e7ebc661f331965fdf5f0a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.2.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.2.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "e4dd5b6a9627a4304eee1deff0acd1b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31457280, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 12582912 |
|
} |
|
], |
|
"md5sum": "6fc3d4915773bfe4dfebcea75363a20a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e2397bf08637903fc2a829b061482ca1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30683136, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 27525120 |
|
}, |
|
{ |
|
"name": "model.layers.10.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 30670848 |
|
} |
|
], |
|
"md5sum": "ed3624d2577bb72001b0a17b93dfedb8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3b4cc8feb72e32c6fb5696a05a0532e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "84e5ba0b76e2db1877f489dc52bdc75a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3606aad64721f86f69283b5133489ce3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25190400, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.10.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.10.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.8.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 18898944 |
|
} |
|
], |
|
"md5sum": "07ef6dbefa61b37f670f813e86614242" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 31469568, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.8.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 12595200 |
|
} |
|
], |
|
"md5sum": "408803a0fe7b923743147c7d8b281d9a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f32990c8b6b6cfd96d17b24679657a04" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30683136, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 2359296 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 27525120 |
|
}, |
|
{ |
|
"name": "model.layers.9.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 30670848 |
|
} |
|
], |
|
"md5sum": "851267d838d3844ba290933e0285b082" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "092412258cddab7562d5970316992976" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4fe5531ac1f7423ae43a4c3737762985" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a2d0819e5f0643f229b31032c911bb6b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "276e17f46f2cbdcd6eacdcea31d5cf93" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.9.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.9.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.11.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "abccfb40e6ef9ca480dfaa0bdc229c9a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "47bc26bedf0a1eedbb2e9ff244cd6932" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "13edcba6d8c43e1f82991df743109648" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "22e564a20bd07c599d6f3ce0f018c44c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "47835e2aa415d479974c59b3a0c8cee1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.11.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.11.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.12.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "a96b9c6c4c575b3c546947635a6dd63a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "afbc03dc736ceae546912fb4d602a404" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "788f8f9ee1fce1c764d5fe5565b24fef" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5be02f711ede85af6c8fd2e685460895" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "96783c79f409f8cb260abe2458f924f1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.12.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.12.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "7d2656749fb8278089bc5f53aa728d9a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0911001f29254765971b9b9261f96c05" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "15b3c5a6d8fbd0579e09f4b8066e65f8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "47c3387f854279a100a9481d9a51b9a0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8dd5beb541e184bfe823f60e99e12ee7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.13.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.13.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.13.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.14.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "4f9a9df58c2aff42f1257dcc06207ba7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5dc255fcaa6d2ebedbbd6472e60374ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "658f2e2bc6d693803d5f492492d0b867" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b7ef6ebe7ffca7a347a955e13f41991a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4001079367da021cb7a6410cebacbba5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.14.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.14.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.15.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "b4c542629ed6200aa40b2f59ef5dbd27" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2d0a1783c3fc9b6bf243122ba4c1341f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fc62520dfe3a7f801c884d6f40f53ebc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4291ebdf8a976c43f5e89c8fb701f43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f6930048b35b479c57fcd0bf50e8688" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.15.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.15.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.16.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "765778843b2671508c7657f45de48916" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9c4505b49e2a22b0139e8e911d7814d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9e40859330c26e12cd30dd32b5ce0b3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c2d6fdf5b6b478a30e07043a3bd46b1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c4bad6632cec11ce9fff010672657e18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.16.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.16.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.17.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "98fc53327d48d9c8d549c3ef03091250" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "458a01858d11b6976e83ca67736d3882" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "18c9e8b2fa36b81fc91efd282dc56843" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "352092fa3a117c54053ae74394a9bfe8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "36e2be42bc529079655991e9b1b0edd3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.17.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.17.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "7039044fe6e1ed73356fc6584ce04d07" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3d3578cfa01d104cfc3bc5fb14802dce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ccf97c4e498af5f5e42b28d36978c0d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bd031a77cc067ea75ae980e3135df59a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b9ede40e028dc538c97d1296b87ac926" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.18.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.18.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.18.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.19.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "36419df511374884ec6090653aaeb703" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f2236181b94ba9eded8b77a9e4c36c21" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "363672288a990054f3d91a0d8e0515ee" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d37d60cab3032a3e757527f86f4e4378" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8d02c403e85c7c9e1288930fe08a4f7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.19.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.19.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.20.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "61d073aa2944c35e2b12fa4eba7ab1d5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a2962b4f452105c59e8a3932f6ce4aa5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "08a928683c06e6c1ce52ea864f61aca9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e6283e7a1b84a3d55300a55ed28a6345" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ae5326ba4129ad8475fd590beaf15ad4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.20.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.20.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.21.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "fd2568e049aa6b649b10778f5832e779" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cb8b21ad7e16ddda20dcfe5b434a6cf6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e273312afefc1908838ce1592500fe45" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "38ec7c3ae25f6cf3b213faa0fb3c9f9e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c020d25711b0ad5ccd81542d70da6e70" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.21.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.21.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.22.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "a117d3cfab3c419189f8ba368c9ffbb4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d9e505593dc5161b19011c04d5ecb149" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0a69edded9673c9fbd3e206dc1988b57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "36a206c339057ab110a2af28d365f9fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "259de1775cdf657963951eed311acff4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.22.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "7f1cdee70a8d250ae940f0da77aedc30" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "858f520d3eba4f505cd4ad77858ac9a1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d2a19673da5ad3ca66026da29356cff0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3ce2414d0c1094222fdf42eee9aecbbd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77195066b9e95ad84c99fc004b152000" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.23.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.23.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.23.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.24.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "5e234a56183a9aa0d155e7633f368b7a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "544ca198afede6252dd0ef1d7f56a1c2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "699af39911b5cb6a162166ed0e738a43" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "afe08664a54f124489628536dc9dc1a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5ed3c5448897387d6d0e4f8b0f00b8ce" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.24.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.24.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.25.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "806004dd038b2d3592e1923663c0579f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f14970a54bf3ddd401abaf6a95c1e37" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1857d752a47db500c22f548fc65e551a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "80c9bdff2d0767a1e44637f0cc9a14c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d32a9438c7b57feeb9c599a86f133f57" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.25.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.25.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.26.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "a627109c55de246ec1dc39e8748c7916" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7f9d39c8744f99c5503f0b46715d3731" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8bbe9836c83d51b01f0cc798d9366484" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0840dfdf3a249cf22f01842c716a240b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "57de330e38f0bdd92dc46e16db89dcbe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_114.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.26.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.26.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.27.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "bc452cab7df8eb6689b650424af6e37d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_115.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ac81d55f14703238822d9b7142eb3e80" |
|
}, |
|
{ |
|
"dataPath": "params_shard_116.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0d15a18b11c0ac7d141197a3cffd5d6e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_117.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "52d301feff3e97d539dad635a388616a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_118.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d941e93f775fa1980acce970a71d0d10" |
|
}, |
|
{ |
|
"dataPath": "params_shard_119.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.27.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.27.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "527d53dfcf345f55e10f186b6c67c29a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_120.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48a8ee459fde350836b3ab5d309ec52f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_121.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "098e8c76a136c1af57c88ee5475bd61c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_122.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe0bb7ec73c16c294611edc6035b1c7b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_123.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a19259de1d8530f22a6cc98f1c611800" |
|
}, |
|
{ |
|
"dataPath": "params_shard_124.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.28.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.28.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.28.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.28.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.29.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "f781555716073f1376db51ab11b14068" |
|
}, |
|
{ |
|
"dataPath": "params_shard_125.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "15a15b960c29c3b3150742696675df13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_126.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "479daeebb45ac3afcc64cff340e78108" |
|
}, |
|
{ |
|
"dataPath": "params_shard_127.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "80a7f78774f60ade2deae5e1c7181ea7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_128.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2dd02cfca66b70346c9616533218d801" |
|
}, |
|
{ |
|
"dataPath": "params_shard_129.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.29.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.29.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.29.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.30.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "e7ba1704f926e8b5a18b95b5ae14d6de" |
|
}, |
|
{ |
|
"dataPath": "params_shard_130.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a31e87571b5c1603c6675bdcb640bb86" |
|
}, |
|
{ |
|
"dataPath": "params_shard_131.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "68a3a9847f2f5e96a98a5ff5b3cc22e1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_132.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d6635932076ee7ae60d6870246fe4492" |
|
}, |
|
{ |
|
"dataPath": "params_shard_133.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dcf222a54df8826a64447ef22238e7d8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_134.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3b989b5910ca7ab8a0967d84a65e9ed3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_135.bin", |
|
"format": "raw-shard", |
|
"nbytes": 30720000, |
|
"records": [ |
|
{ |
|
"name": "model.layers.30.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.30.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.30.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.3.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.3.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.3.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 25190400 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 25202688 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 27561984 |
|
}, |
|
{ |
|
"name": "model.layers.4.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 30707712 |
|
} |
|
], |
|
"md5sum": "0079ced022acdc63d69bda6f15245559" |
|
}, |
|
{ |
|
"dataPath": "params_shard_136.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "41379eda820249a81211dfa434f4a979" |
|
}, |
|
{ |
|
"dataPath": "params_shard_137.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e38e3ceaefb4234f352beb9c96f342e2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_138.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f965941c2373d926264d5ec949873ac4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_139.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "60a71fdd8a778cf14738a95918be6c3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_140.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.4.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.4.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.5.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "05d739cd6795ea011976c3c2065888b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_141.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "87d22d8e36802a73d718706c7d6fd96b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_142.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fa2a297bd096534817b06bea667d2fa1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_143.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b29599af09de4cffdf24edb87700cbe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_144.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8cc7babf63523b3c6e44751ff942f402" |
|
}, |
|
{ |
|
"dataPath": "params_shard_145.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.5.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.5.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.31.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "c8d51561127726d565757b921fca3a76" |
|
}, |
|
{ |
|
"dataPath": "params_shard_146.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "50e816d5c9fe602559ee2d4ea4295b8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_147.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ab33239cbbd77eec2fae075c335172cb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_148.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74c2e02fb8491c4910b234d7d4d45e13" |
|
}, |
|
{ |
|
"dataPath": "params_shard_149.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "26e5fa91d9891f78cbd91980c2bc0ac8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_150.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.31.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.31.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.31.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.32.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.32.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.32.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "18c03d01f5c37b6451fe5ee339b4a090" |
|
}, |
|
{ |
|
"dataPath": "params_shard_151.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6a0859ea08b7e301b0b5018dcd56ffe5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_152.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f79c9d511dc802fc225946963fd3e77e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_153.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6689fbe1c7068caf0b123fb0c0cf342f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_154.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3da8de763ec40e535dc1f4e3f885c8a9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_155.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.32.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.32.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.32.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.33.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.33.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "b4fcf771afaf81fcd94e6d274bf572e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_156.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "11a6fdaca1a73e009506350ec82e7c8d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_157.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "76b7046f612b016d686346e7f0ab8127" |
|
}, |
|
{ |
|
"dataPath": "params_shard_158.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "48a82ffa3e55a1732cf8e2564b6482c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_159.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b7bdec7af04ef0ec9ee769e40167d747" |
|
}, |
|
{ |
|
"dataPath": "params_shard_160.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.33.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.33.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.33.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.33.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.34.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.34.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.34.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "89e1ebeec79650c27272f2d01add47bb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_161.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9ae36c126677a6da92e38af573f4dcbb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_162.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5a9e70ef4180df15959b4b74f0bdd515" |
|
}, |
|
{ |
|
"dataPath": "params_shard_163.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "77dd68aeced77062d5cf579e5e4b8183" |
|
}, |
|
{ |
|
"dataPath": "params_shard_164.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0abb12874483071b1b93c5f7a73590c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_165.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.34.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.34.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.34.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.35.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.35.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.35.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "bc0d7a412fbe8ce335f2bc37bfa628f3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_166.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "86bcdda8511f80761893c656f0dbd87a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_167.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4d9f421d1a9dd97afc88992b4c0be1b5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_168.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "acef8c9ca86ad8bf288a5a4ebaf2b5c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_169.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e018ef6bc65f9e8336f3d0b7618b28c6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_170.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.35.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.35.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.35.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.36.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.36.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.36.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "2664b046847fb7fb932e82f7332a3cec" |
|
}, |
|
{ |
|
"dataPath": "params_shard_171.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c3b8e5a9559b78be63350c9bee34d3be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_172.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d9ff175b218398c37f7bd0e4060198d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_173.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2aa61dffe2702ea1973c4b43103ade7e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_174.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df985b133d2cff7d287af4c24532bed9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_175.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.36.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.36.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.36.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.37.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.37.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.37.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "4a87b3ace7cb42a5db8301cd85057455" |
|
}, |
|
{ |
|
"dataPath": "params_shard_176.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f196502bedfa81e74f6dcf6de936490d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_177.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3227ae87c4db22aa11c73c124354a829" |
|
}, |
|
{ |
|
"dataPath": "params_shard_178.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "035ee951305be1f8e29640d8808a1335" |
|
}, |
|
{ |
|
"dataPath": "params_shard_179.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "64f4ff82bef9ce34c10a2c14502882e9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_180.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.37.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.37.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.37.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.38.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.38.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "6d707a3f880cb92bde7991add4deb11a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_181.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc1d8c93e48262e777acf4ef2c649ab1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_182.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b606c4df0ac229b6f763bd45caed24c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_183.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cac397c873151fc28493cd5cc8ac8462" |
|
}, |
|
{ |
|
"dataPath": "params_shard_184.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5b282a4e0f1f620883a8e8f4b2243fbc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_185.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.38.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.38.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.38.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.38.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.39.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.39.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.39.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "4a6621b0ff6c3aafb5ade1ae71a951f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_186.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "80100a4ad9abf476cf0b6e213e0f955d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_187.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9319877fac8932660b8c29d51c5eb2c3" |
|
}, |
|
{ |
|
"dataPath": "params_shard_188.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb3c5dd9b0337706c619c817dc593b8e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_189.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c6dd74c402afca8a0021b936bbe20c5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_190.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.39.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.39.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.39.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.40.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.40.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.40.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "a51684dc81a50644d97e5fe4e27b8937" |
|
}, |
|
{ |
|
"dataPath": "params_shard_191.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c33ba7c0c4fdc575bedeffe80980d67" |
|
}, |
|
{ |
|
"dataPath": "params_shard_192.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5a0b42892c96af35138bc83ee3230170" |
|
}, |
|
{ |
|
"dataPath": "params_shard_193.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4fa5a22a474349d9bbe54cbcfe380016" |
|
}, |
|
{ |
|
"dataPath": "params_shard_194.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e267bd8f1ec923d86374074c1babe27b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_195.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.40.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.40.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.40.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.41.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.41.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.41.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "b80fcb15f5506390b151c83f3097396b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_196.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "27a8111120c8bdad801ab2b5280b2854" |
|
}, |
|
{ |
|
"dataPath": "params_shard_197.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9bbca7ec1cac3c634bbeeedbf4c01862" |
|
}, |
|
{ |
|
"dataPath": "params_shard_198.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d5ea3f07bbbbe2f5c812579a52c1e43b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_199.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6ab6a928a629b9562e8db68dc0612cfc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_200.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.41.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.41.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.41.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.42.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.42.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.42.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "8bae582e3014ae7efa050d1de9ce12f7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_201.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a13f83f8d1c9a51a34119326b3a3c828" |
|
}, |
|
{ |
|
"dataPath": "params_shard_202.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "968cdba022ccef891817812ba7a2d163" |
|
}, |
|
{ |
|
"dataPath": "params_shard_203.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bb19b59688203488294c00dc49cea71e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_204.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8ae40bf7740a1300ea953a2f44b07595" |
|
}, |
|
{ |
|
"dataPath": "params_shard_205.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.42.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.42.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.42.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.43.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.43.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "86be1e4044031ddc2eb042e526bf4ef8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_206.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "00912badda50c26a43466995f1443980" |
|
}, |
|
{ |
|
"dataPath": "params_shard_207.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "74a0c168eccfbb4d87cf4762c23e204e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_208.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f5a2599d267a16f90abed4080a013ef5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_209.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e0ba2c861e4ef88f69ec409d64781188" |
|
}, |
|
{ |
|
"dataPath": "params_shard_210.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.43.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.43.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.43.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12595200 |
|
}, |
|
{ |
|
"name": "model.layers.43.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.44.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.44.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.44.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "802f24e1497755b57a89d729135bd6b4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_211.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "02085c4103357d5048262894e0675070" |
|
}, |
|
{ |
|
"dataPath": "params_shard_212.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fe669c4806d8f28e248fe20cb549babb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_213.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf374452ba9da844957b1c0c3cd302df" |
|
}, |
|
{ |
|
"dataPath": "params_shard_214.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "df3962f80bba9c8c0162c31883bbf122" |
|
}, |
|
{ |
|
"dataPath": "params_shard_215.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.44.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.44.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.44.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.45.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.45.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.45.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "8b6d92aa1f4065ee97a36717eaa14051" |
|
}, |
|
{ |
|
"dataPath": "params_shard_216.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dba137a9150d8c55e433e3f9e9f3eb9e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_217.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ac2e15081a42527e7be1c2101b8bda37" |
|
}, |
|
{ |
|
"dataPath": "params_shard_218.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4e1f762a8d42c40f1f14587c6f6a3dae" |
|
}, |
|
{ |
|
"dataPath": "params_shard_219.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e39e392dbf164e60bcaae36e40fb88cc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_220.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.45.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.45.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.45.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.46.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.46.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.46.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "c9eb04ef4688c98c401ef8694b0cfac7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_221.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3a0429b050d5a684557b5ceda801d368" |
|
}, |
|
{ |
|
"dataPath": "params_shard_222.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "37d4053e28465b15285dafd92c614ce5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_223.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce7b92f08c11481b1c48ad5e30328adf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_224.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "90f23a4989070baa382c4f00452f3ae6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_225.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.46.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.46.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.46.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.47.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.47.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.47.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "a7a383f57edd1043efe922aff58a3511" |
|
}, |
|
{ |
|
"dataPath": "params_shard_226.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b32c806d61e9c98773795137ba661f58" |
|
}, |
|
{ |
|
"dataPath": "params_shard_227.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4bcf5fee33bca1c01df61baae15965c4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_228.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0b5773ba6f4403a73e0e97cd5bcc97ca" |
|
}, |
|
{ |
|
"dataPath": "params_shard_229.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cec69728dee67ce16f80840dc1994e47" |
|
}, |
|
{ |
|
"dataPath": "params_shard_230.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24416256, |
|
"records": [ |
|
{ |
|
"name": "model.layers.47.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.47.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.47.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18898944 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21258240 |
|
}, |
|
{ |
|
"name": "model.layers.6.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24403968 |
|
} |
|
], |
|
"md5sum": "a051292576a8fc00a20c884549ff23a6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_231.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5bd97984ebb108261b2d8981f5775f4d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_232.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e8515fc212f5fb9326e9917d5aeadf7f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_233.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "23b8daedb91c8cf9953227c3fa0abeb6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_234.bin", |
|
"format": "raw-shard", |
|
"nbytes": 100663296, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.q_weight", |
|
"shape": [ |
|
32768, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 100663296, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c465c9a9b5cd6d3127d54bb394425c1" |
|
}, |
|
{ |
|
"dataPath": "params_shard_235.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24403968, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.6.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.6.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
}, |
|
{ |
|
"name": "model.layers.7.attention_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 24391680 |
|
} |
|
], |
|
"md5sum": "e233e3602a93e876e918b12cda350ff8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_236.bin", |
|
"format": "raw-shard", |
|
"nbytes": 50331648, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.q_weight", |
|
"shape": [ |
|
6144, |
|
2048 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 50331648, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "da608faf06dea26b5e238c7a3fa4b172" |
|
}, |
|
{ |
|
"dataPath": "params_shard_237.bin", |
|
"format": "raw-shard", |
|
"nbytes": 18874368, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.attention.wo.q_weight", |
|
"shape": [ |
|
6144, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 18874368, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a41338d85c921853598a7ea236bc2340" |
|
}, |
|
{ |
|
"dataPath": "params_shard_238.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25165824, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.attention.wqkv.q_weight", |
|
"shape": [ |
|
8192, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25165824, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bc1c6b272d262b186515042428a533e8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_239.bin", |
|
"format": "raw-shard", |
|
"nbytes": 284295168, |
|
"records": [ |
|
{ |
|
"name": "output.q_weight", |
|
"shape": [ |
|
92544, |
|
768 |
|
], |
|
"dtype": "uint32", |
|
"format": "f32-to-bf16", |
|
"nbytes": 284295168, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "97903f18a37886602bf0fb396e853bc9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_240.bin", |
|
"format": "raw-shard", |
|
"nbytes": 35536896, |
|
"records": [ |
|
{ |
|
"name": "output.q_scale", |
|
"shape": [ |
|
92544, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 35536896, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1f1edc7d0bca4464d3e33796f650d039" |
|
}, |
|
{ |
|
"dataPath": "params_shard_241.bin", |
|
"format": "raw-shard", |
|
"nbytes": 24391680, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.feed_forward.gate_up_proj.q_scale", |
|
"shape": [ |
|
32768, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12582912, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.7.feed_forward.w2.q_scale", |
|
"shape": [ |
|
6144, |
|
512 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 6291456, |
|
"byteOffset": 12582912 |
|
}, |
|
{ |
|
"name": "model.layers.7.ffn_norm.weight", |
|
"shape": [ |
|
6144 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 12288, |
|
"byteOffset": 18874368 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wo.q_scale", |
|
"shape": [ |
|
6144, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 2359296, |
|
"byteOffset": 18886656 |
|
}, |
|
{ |
|
"name": "model.layers.8.attention.wqkv.q_scale", |
|
"shape": [ |
|
8192, |
|
192 |
|
], |
|
"dtype": "bfloat16", |
|
"format": "raw", |
|
"nbytes": 3145728, |
|
"byteOffset": 21245952 |
|
} |
|
], |
|
"md5sum": "0bf2bc92da8dddfc1b427182b67dbf8e" |
|
} |
|
] |
|
} |