{ "metadata": { "ParamSize": 485, "ParamBytes": 12415229952.0, "BitsPerParam": 4.553632488924832 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "716263458277ff83a3c46d6ce71cb2e7" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "822af490c3f00cb92dee80f82efdaf4a" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.0.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 }, { "name": "model.layers.0.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18874368 }, { "name": "model.layers.0.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21233664 }, { "name": "model.layers.0.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24379392 } ], "md5sum": "50b5a8c597ee10cf9bc7cb4fa7f42de1" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "62f9bc64bbaf245d93e96588af3f77ab" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 284295168, "records": [ { "name": "model.tok_embeddings.q_weight", "shape": [ 92544, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 284295168, "byteOffset": 0 } ], "md5sum": "33023165380a281e29e6c0891a18ac15" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 35536896, "records": [ { "name": "model.tok_embeddings.q_scale", "shape": [ 92544, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 35536896, "byteOffset": 0 } ], "md5sum": "f263786d832df47074583e9f354a1e13" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.1.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "96b447fd7c2184937db674437dab9843" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bd69b76c0a28499d89219b5b2f3e76a9" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "01cfc9de14070a6e1bba13da2cc46949" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.0.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.0.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.0.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.1.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.1.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.1.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "d3b17fe7e5eab772e75afd02d45b4ab2" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5d8f188559e6a439e59d930b7d9fa579" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.2.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4005d53bcbf910fda00b8c4d5b5cb7e5" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3fead2bf931e44cad7e79d48ef0a9fe0" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "9fb1045f34211028c510010ebc95af7a" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.1.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.1.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.1.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.2.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.2.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.2.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "f2b7684535c4547cacc6c5bab05f4041" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "99a0ee2dd8cbb1c9fa47528046b7a4fb" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.3.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1ff931500a186275ffda8e7ac75f8126" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "36946b926dc1a54a33fb39691e988b08" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "df7b7f2e51e7ebc661f331965fdf5f0a" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.2.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.2.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.2.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.3.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.3.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 } ], "md5sum": "e4dd5b6a9627a4304eee1deff0acd1b1" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31457280, "records": [ { "name": "model.layers.3.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 12582912 } ], "md5sum": "6fc3d4915773bfe4dfebcea75363a20a" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.10.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e2397bf08637903fc2a829b061482ca1" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 30683136, "records": [ { "name": "model.layers.10.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.10.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 2359296 }, { "name": "model.layers.10.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 27525120 }, { "name": "model.layers.10.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 30670848 } ], "md5sum": "ed3624d2577bb72001b0a17b93dfedb8" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c3b4cc8feb72e32c6fb5696a05a0532e" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "84e5ba0b76e2db1877f489dc52bdc75a" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.8.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3606aad64721f86f69283b5133489ce3" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25190400, "records": [ { "name": "model.layers.10.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.10.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.10.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.8.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.8.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18898944 } ], "md5sum": "07ef6dbefa61b37f670f813e86614242" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.8.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.8.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.9.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 12595200 } ], "md5sum": "408803a0fe7b923743147c7d8b281d9a" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.9.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "f32990c8b6b6cfd96d17b24679657a04" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 30683136, "records": [ { "name": "model.layers.9.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 0 }, { "name": "model.layers.9.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 2359296 }, { "name": "model.layers.9.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 27525120 }, { "name": "model.layers.9.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 30670848 } ], "md5sum": "851267d838d3844ba290933e0285b082" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "092412258cddab7562d5970316992976" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.11.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4fe5531ac1f7423ae43a4c3737762985" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a2d0819e5f0643f229b31032c911bb6b" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.11.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "276e17f46f2cbdcd6eacdcea31d5cf93" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.9.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.9.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.9.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.11.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.11.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.11.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "abccfb40e6ef9ca480dfaa0bdc229c9a" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "47bc26bedf0a1eedbb2e9ff244cd6932" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.12.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "13edcba6d8c43e1f82991df743109648" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "22e564a20bd07c599d6f3ce0f018c44c" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "47835e2aa415d479974c59b3a0c8cee1" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.11.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.11.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.11.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.12.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.12.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.12.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "a96b9c6c4c575b3c546947635a6dd63a" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "afbc03dc736ceae546912fb4d602a404" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.13.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "788f8f9ee1fce1c764d5fe5565b24fef" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5be02f711ede85af6c8fd2e685460895" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.13.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "96783c79f409f8cb260abe2458f924f1" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.12.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.12.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.12.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.13.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.13.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 } ], "md5sum": "7d2656749fb8278089bc5f53aa728d9a" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0911001f29254765971b9b9261f96c05" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.14.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "15b3c5a6d8fbd0579e09f4b8066e65f8" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "47c3387f854279a100a9481d9a51b9a0" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.14.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8dd5beb541e184bfe823f60e99e12ee7" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 24416256, "records": [ { "name": "model.layers.13.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.13.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.13.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12595200 }, { "name": "model.layers.13.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.14.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18898944 }, { "name": "model.layers.14.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21258240 }, { "name": "model.layers.14.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24403968 } ], "md5sum": "4f9a9df58c2aff42f1257dcc06207ba7" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5dc255fcaa6d2ebedbbd6472e60374ce" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.15.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "658f2e2bc6d693803d5f492492d0b867" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b7ef6ebe7ffca7a347a955e13f41991a" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.15.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "4001079367da021cb7a6410cebacbba5" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.14.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.14.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.14.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.15.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.15.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.15.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "b4c542629ed6200aa40b2f59ef5dbd27" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.15.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2d0a1783c3fc9b6bf243122ba4c1341f" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.16.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "fc62520dfe3a7f801c884d6f40f53ebc" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "c4291ebdf8a976c43f5e89c8fb701f43" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "1f6930048b35b479c57fcd0bf50e8688" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.15.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.15.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.15.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.16.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.16.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.16.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "765778843b2671508c7657f45de48916" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.16.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9c4505b49e2a22b0139e8e911d7814d4" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.17.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b9e40859330c26e12cd30dd32b5ce0b3" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8c2d6fdf5b6b478a30e07043a3bd46b1" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.17.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c4bad6632cec11ce9fff010672657e18" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.16.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.16.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.16.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.17.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.17.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.17.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "98fc53327d48d9c8d549c3ef03091250" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.17.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "458a01858d11b6976e83ca67736d3882" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.18.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "18c9e8b2fa36b81fc91efd282dc56843" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "352092fa3a117c54053ae74394a9bfe8" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.18.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "36e2be42bc529079655991e9b1b0edd3" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.17.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.17.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.17.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.18.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.18.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 } ], "md5sum": "7039044fe6e1ed73356fc6584ce04d07" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.18.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3d3578cfa01d104cfc3bc5fb14802dce" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.19.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5ccf97c4e498af5f5e42b28d36978c0d" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bd031a77cc067ea75ae980e3135df59a" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.19.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b9ede40e028dc538c97d1296b87ac926" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 24416256, "records": [ { "name": "model.layers.18.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.18.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.18.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12595200 }, { "name": "model.layers.18.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.19.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18898944 }, { "name": "model.layers.19.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21258240 }, { "name": "model.layers.19.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24403968 } ], "md5sum": "36419df511374884ec6090653aaeb703" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.19.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f2236181b94ba9eded8b77a9e4c36c21" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.20.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "363672288a990054f3d91a0d8e0515ee" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d37d60cab3032a3e757527f86f4e4378" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d8d02c403e85c7c9e1288930fe08a4f7" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.19.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.19.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.19.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.20.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.20.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.20.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "61d073aa2944c35e2b12fa4eba7ab1d5" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.20.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a2962b4f452105c59e8a3932f6ce4aa5" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.21.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "08a928683c06e6c1ce52ea864f61aca9" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e6283e7a1b84a3d55300a55ed28a6345" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "ae5326ba4129ad8475fd590beaf15ad4" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.20.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.20.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.20.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.21.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.21.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.21.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "fd2568e049aa6b649b10778f5832e779" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.21.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "cb8b21ad7e16ddda20dcfe5b434a6cf6" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.22.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e273312afefc1908838ce1592500fe45" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.22.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "38ec7c3ae25f6cf3b213faa0fb3c9f9e" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.22.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "c020d25711b0ad5ccd81542d70da6e70" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.21.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.21.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.21.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.22.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.22.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.22.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "a117d3cfab3c419189f8ba368c9ffbb4" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.22.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "d9e505593dc5161b19011c04d5ecb149" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.23.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0a69edded9673c9fbd3e206dc1988b57" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.23.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "36a206c339057ab110a2af28d365f9fa" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.23.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "259de1775cdf657963951eed311acff4" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.22.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.22.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.22.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.23.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.23.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 } ], "md5sum": "7f1cdee70a8d250ae940f0da77aedc30" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.23.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "858f520d3eba4f505cd4ad77858ac9a1" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.24.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d2a19673da5ad3ca66026da29356cff0" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "3ce2414d0c1094222fdf42eee9aecbbd" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.24.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "77195066b9e95ad84c99fc004b152000" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 24416256, "records": [ { "name": "model.layers.23.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.23.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.23.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12595200 }, { "name": "model.layers.23.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.24.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18898944 }, { "name": "model.layers.24.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21258240 }, { "name": "model.layers.24.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24403968 } ], "md5sum": "5e234a56183a9aa0d155e7633f368b7a" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.24.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "544ca198afede6252dd0ef1d7f56a1c2" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.25.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "699af39911b5cb6a162166ed0e738a43" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "afe08664a54f124489628536dc9dc1a8" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.25.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5ed3c5448897387d6d0e4f8b0f00b8ce" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.24.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.24.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.24.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.25.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.25.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.25.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "806004dd038b2d3592e1923663c0579f" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.25.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "1f14970a54bf3ddd401abaf6a95c1e37" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.26.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "1857d752a47db500c22f548fc65e551a" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "80c9bdff2d0767a1e44637f0cc9a14c1" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.26.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d32a9438c7b57feeb9c599a86f133f57" }, { "dataPath": "params_shard_109.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.25.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.25.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.25.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.26.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.26.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.26.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "a627109c55de246ec1dc39e8748c7916" }, { "dataPath": "params_shard_110.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.26.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "7f9d39c8744f99c5503f0b46715d3731" }, { "dataPath": "params_shard_111.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.27.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "8bbe9836c83d51b01f0cc798d9366484" }, { "dataPath": "params_shard_112.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0840dfdf3a249cf22f01842c716a240b" }, { "dataPath": "params_shard_113.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.27.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "57de330e38f0bdd92dc46e16db89dcbe" }, { "dataPath": "params_shard_114.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.26.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.26.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.26.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.27.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.27.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.27.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "bc452cab7df8eb6689b650424af6e37d" }, { "dataPath": "params_shard_115.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.27.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ac81d55f14703238822d9b7142eb3e80" }, { "dataPath": "params_shard_116.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.28.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "0d15a18b11c0ac7d141197a3cffd5d6e" }, { "dataPath": "params_shard_117.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "52d301feff3e97d539dad635a388616a" }, { "dataPath": "params_shard_118.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.28.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d941e93f775fa1980acce970a71d0d10" }, { "dataPath": "params_shard_119.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.27.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.27.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.27.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.28.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.28.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 } ], "md5sum": "527d53dfcf345f55e10f186b6c67c29a" }, { "dataPath": "params_shard_120.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.28.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "48a8ee459fde350836b3ab5d309ec52f" }, { "dataPath": "params_shard_121.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.29.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "098e8c76a136c1af57c88ee5475bd61c" }, { "dataPath": "params_shard_122.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fe0bb7ec73c16c294611edc6035b1c7b" }, { "dataPath": "params_shard_123.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.29.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "a19259de1d8530f22a6cc98f1c611800" }, { "dataPath": "params_shard_124.bin", "format": "raw-shard", "nbytes": 24416256, "records": [ { "name": "model.layers.28.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.28.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.28.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12595200 }, { "name": "model.layers.28.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.29.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18898944 }, { "name": "model.layers.29.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21258240 }, { "name": "model.layers.29.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24403968 } ], "md5sum": "f781555716073f1376db51ab11b14068" }, { "dataPath": "params_shard_125.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.29.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "15a15b960c29c3b3150742696675df13" }, { "dataPath": "params_shard_126.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.30.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "479daeebb45ac3afcc64cff340e78108" }, { "dataPath": "params_shard_127.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.30.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "80a7f78774f60ade2deae5e1c7181ea7" }, { "dataPath": "params_shard_128.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.30.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "2dd02cfca66b70346c9616533218d801" }, { "dataPath": "params_shard_129.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.29.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.29.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.29.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.30.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.30.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.30.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "e7ba1704f926e8b5a18b95b5ae14d6de" }, { "dataPath": "params_shard_130.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.30.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a31e87571b5c1603c6675bdcb640bb86" }, { "dataPath": "params_shard_131.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "68a3a9847f2f5e96a98a5ff5b3cc22e1" }, { "dataPath": "params_shard_132.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.4.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "d6635932076ee7ae60d6870246fe4492" }, { "dataPath": "params_shard_133.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "dcf222a54df8826a64447ef22238e7d8" }, { "dataPath": "params_shard_134.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3b989b5910ca7ab8a0967d84a65e9ed3" }, { "dataPath": "params_shard_135.bin", "format": "raw-shard", "nbytes": 30720000, "records": [ { "name": "model.layers.30.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.30.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.30.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.3.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.3.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 18898944 }, { "name": "model.layers.3.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 25190400 }, { "name": "model.layers.4.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 25202688 }, { "name": "model.layers.4.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 27561984 }, { "name": "model.layers.4.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 30707712 } ], "md5sum": "0079ced022acdc63d69bda6f15245559" }, { "dataPath": "params_shard_136.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "41379eda820249a81211dfa434f4a979" }, { "dataPath": "params_shard_137.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.5.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e38e3ceaefb4234f352beb9c96f342e2" }, { "dataPath": "params_shard_138.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f965941c2373d926264d5ec949873ac4" }, { "dataPath": "params_shard_139.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "60a71fdd8a778cf14738a95918be6c3c" }, { "dataPath": "params_shard_140.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.4.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.4.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.4.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.5.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.5.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.5.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "05d739cd6795ea011976c3c2065888b0" }, { "dataPath": "params_shard_141.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "87d22d8e36802a73d718706c7d6fd96b" }, { "dataPath": "params_shard_142.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.31.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "fa2a297bd096534817b06bea667d2fa1" }, { "dataPath": "params_shard_143.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.31.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0b29599af09de4cffdf24edb87700cbe" }, { "dataPath": "params_shard_144.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.31.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8cc7babf63523b3c6e44751ff942f402" }, { "dataPath": "params_shard_145.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.5.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.5.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.5.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.31.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.31.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.31.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "c8d51561127726d565757b921fca3a76" }, { "dataPath": "params_shard_146.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.31.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "50e816d5c9fe602559ee2d4ea4295b8c" }, { "dataPath": "params_shard_147.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.32.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ab33239cbbd77eec2fae075c335172cb" }, { "dataPath": "params_shard_148.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.32.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "74c2e02fb8491c4910b234d7d4d45e13" }, { "dataPath": "params_shard_149.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.32.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "26e5fa91d9891f78cbd91980c2bc0ac8" }, { "dataPath": "params_shard_150.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.31.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.31.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.31.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.32.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.32.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.32.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "18c03d01f5c37b6451fe5ee339b4a090" }, { "dataPath": "params_shard_151.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.32.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "6a0859ea08b7e301b0b5018dcd56ffe5" }, { "dataPath": "params_shard_152.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.33.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "f79c9d511dc802fc225946963fd3e77e" }, { "dataPath": "params_shard_153.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.33.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6689fbe1c7068caf0b123fb0c0cf342f" }, { "dataPath": "params_shard_154.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.33.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "3da8de763ec40e535dc1f4e3f885c8a9" }, { "dataPath": "params_shard_155.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.32.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.32.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.32.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.33.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.33.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 } ], "md5sum": "b4fcf771afaf81fcd94e6d274bf572e0" }, { "dataPath": "params_shard_156.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.33.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "11a6fdaca1a73e009506350ec82e7c8d" }, { "dataPath": "params_shard_157.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.34.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "76b7046f612b016d686346e7f0ab8127" }, { "dataPath": "params_shard_158.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.34.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "48a82ffa3e55a1732cf8e2564b6482c1" }, { "dataPath": "params_shard_159.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.34.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "b7bdec7af04ef0ec9ee769e40167d747" }, { "dataPath": "params_shard_160.bin", "format": "raw-shard", "nbytes": 24416256, "records": [ { "name": "model.layers.33.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.33.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.33.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12595200 }, { "name": "model.layers.33.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.34.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18898944 }, { "name": "model.layers.34.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21258240 }, { "name": "model.layers.34.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24403968 } ], "md5sum": "89e1ebeec79650c27272f2d01add47bb" }, { "dataPath": "params_shard_161.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.34.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "9ae36c126677a6da92e38af573f4dcbb" }, { "dataPath": "params_shard_162.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.35.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5a9e70ef4180df15959b4b74f0bdd515" }, { "dataPath": "params_shard_163.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.35.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "77dd68aeced77062d5cf579e5e4b8183" }, { "dataPath": "params_shard_164.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.35.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "0abb12874483071b1b93c5f7a73590c6" }, { "dataPath": "params_shard_165.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.34.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.34.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.34.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.35.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.35.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.35.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "bc0d7a412fbe8ce335f2bc37bfa628f3" }, { "dataPath": "params_shard_166.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.35.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "86bcdda8511f80761893c656f0dbd87a" }, { "dataPath": "params_shard_167.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.36.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4d9f421d1a9dd97afc88992b4c0be1b5" }, { "dataPath": "params_shard_168.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.36.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "acef8c9ca86ad8bf288a5a4ebaf2b5c3" }, { "dataPath": "params_shard_169.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.36.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e018ef6bc65f9e8336f3d0b7618b28c6" }, { "dataPath": "params_shard_170.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.35.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.35.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.35.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.36.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.36.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.36.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "2664b046847fb7fb932e82f7332a3cec" }, { "dataPath": "params_shard_171.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.36.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "c3b8e5a9559b78be63350c9bee34d3be" }, { "dataPath": "params_shard_172.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.37.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "6d9ff175b218398c37f7bd0e4060198d" }, { "dataPath": "params_shard_173.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.37.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "2aa61dffe2702ea1973c4b43103ade7e" }, { "dataPath": "params_shard_174.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.37.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "df985b133d2cff7d287af4c24532bed9" }, { "dataPath": "params_shard_175.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.36.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.36.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.36.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.37.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.37.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.37.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "4a87b3ace7cb42a5db8301cd85057455" }, { "dataPath": "params_shard_176.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.37.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "f196502bedfa81e74f6dcf6de936490d" }, { "dataPath": "params_shard_177.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.38.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "3227ae87c4db22aa11c73c124354a829" }, { "dataPath": "params_shard_178.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.38.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "035ee951305be1f8e29640d8808a1335" }, { "dataPath": "params_shard_179.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.38.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "64f4ff82bef9ce34c10a2c14502882e9" }, { "dataPath": "params_shard_180.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.37.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.37.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.37.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.38.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.38.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 } ], "md5sum": "6d707a3f880cb92bde7991add4deb11a" }, { "dataPath": "params_shard_181.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.38.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "bc1d8c93e48262e777acf4ef2c649ab1" }, { "dataPath": "params_shard_182.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.39.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "b606c4df0ac229b6f763bd45caed24c5" }, { "dataPath": "params_shard_183.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.39.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cac397c873151fc28493cd5cc8ac8462" }, { "dataPath": "params_shard_184.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.39.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "5b282a4e0f1f620883a8e8f4b2243fbc" }, { "dataPath": "params_shard_185.bin", "format": "raw-shard", "nbytes": 24416256, "records": [ { "name": "model.layers.38.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.38.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.38.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12595200 }, { "name": "model.layers.38.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.39.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18898944 }, { "name": "model.layers.39.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21258240 }, { "name": "model.layers.39.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24403968 } ], "md5sum": "4a6621b0ff6c3aafb5ade1ae71a951f4" }, { "dataPath": "params_shard_186.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.39.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "80100a4ad9abf476cf0b6e213e0f955d" }, { "dataPath": "params_shard_187.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.40.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9319877fac8932660b8c29d51c5eb2c3" }, { "dataPath": "params_shard_188.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.40.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bb3c5dd9b0337706c619c817dc593b8e" }, { "dataPath": "params_shard_189.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.40.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6c6dd74c402afca8a0021b936bbe20c5" }, { "dataPath": "params_shard_190.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.39.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.39.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.39.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.40.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.40.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.40.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "a51684dc81a50644d97e5fe4e27b8937" }, { "dataPath": "params_shard_191.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.40.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "4c33ba7c0c4fdc575bedeffe80980d67" }, { "dataPath": "params_shard_192.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.41.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "5a0b42892c96af35138bc83ee3230170" }, { "dataPath": "params_shard_193.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.41.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4fa5a22a474349d9bbe54cbcfe380016" }, { "dataPath": "params_shard_194.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.41.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e267bd8f1ec923d86374074c1babe27b" }, { "dataPath": "params_shard_195.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.40.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.40.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.40.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.41.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.41.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.41.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "b80fcb15f5506390b151c83f3097396b" }, { "dataPath": "params_shard_196.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.41.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "27a8111120c8bdad801ab2b5280b2854" }, { "dataPath": "params_shard_197.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.42.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "9bbca7ec1cac3c634bbeeedbf4c01862" }, { "dataPath": "params_shard_198.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.42.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d5ea3f07bbbbe2f5c812579a52c1e43b" }, { "dataPath": "params_shard_199.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.42.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "6ab6a928a629b9562e8db68dc0612cfc" }, { "dataPath": "params_shard_200.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.41.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.41.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.41.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.42.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.42.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.42.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "8bae582e3014ae7efa050d1de9ce12f7" }, { "dataPath": "params_shard_201.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.42.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a13f83f8d1c9a51a34119326b3a3c828" }, { "dataPath": "params_shard_202.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.43.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "968cdba022ccef891817812ba7a2d163" }, { "dataPath": "params_shard_203.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.43.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bb19b59688203488294c00dc49cea71e" }, { "dataPath": "params_shard_204.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.43.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8ae40bf7740a1300ea953a2f44b07595" }, { "dataPath": "params_shard_205.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.42.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.42.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.42.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.43.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.43.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 } ], "md5sum": "86be1e4044031ddc2eb042e526bf4ef8" }, { "dataPath": "params_shard_206.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.43.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "00912badda50c26a43466995f1443980" }, { "dataPath": "params_shard_207.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.44.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "74a0c168eccfbb4d87cf4762c23e204e" }, { "dataPath": "params_shard_208.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.44.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f5a2599d267a16f90abed4080a013ef5" }, { "dataPath": "params_shard_209.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.44.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e0ba2c861e4ef88f69ec409d64781188" }, { "dataPath": "params_shard_210.bin", "format": "raw-shard", "nbytes": 24416256, "records": [ { "name": "model.layers.43.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.43.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 12582912 }, { "name": "model.layers.43.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12595200 }, { "name": "model.layers.43.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.44.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18898944 }, { "name": "model.layers.44.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21258240 }, { "name": "model.layers.44.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24403968 } ], "md5sum": "802f24e1497755b57a89d729135bd6b4" }, { "dataPath": "params_shard_211.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.44.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "02085c4103357d5048262894e0675070" }, { "dataPath": "params_shard_212.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.45.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "fe669c4806d8f28e248fe20cb549babb" }, { "dataPath": "params_shard_213.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.45.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "cf374452ba9da844957b1c0c3cd302df" }, { "dataPath": "params_shard_214.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.45.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "df3962f80bba9c8c0162c31883bbf122" }, { "dataPath": "params_shard_215.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.44.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.44.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.44.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.45.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.45.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.45.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "8b6d92aa1f4065ee97a36717eaa14051" }, { "dataPath": "params_shard_216.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.45.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "dba137a9150d8c55e433e3f9e9f3eb9e" }, { "dataPath": "params_shard_217.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.46.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "ac2e15081a42527e7be1c2101b8bda37" }, { "dataPath": "params_shard_218.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.46.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4e1f762a8d42c40f1f14587c6f6a3dae" }, { "dataPath": "params_shard_219.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.46.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "e39e392dbf164e60bcaae36e40fb88cc" }, { "dataPath": "params_shard_220.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.45.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.45.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.45.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.46.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.46.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.46.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "c9eb04ef4688c98c401ef8694b0cfac7" }, { "dataPath": "params_shard_221.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.46.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3a0429b050d5a684557b5ceda801d368" }, { "dataPath": "params_shard_222.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.47.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "37d4053e28465b15285dafd92c614ce5" }, { "dataPath": "params_shard_223.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.47.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ce7b92f08c11481b1c48ad5e30328adf" }, { "dataPath": "params_shard_224.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.47.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "90f23a4989070baa382c4f00452f3ae6" }, { "dataPath": "params_shard_225.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.46.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.46.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.46.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.47.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.47.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.47.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "a7a383f57edd1043efe922aff58a3511" }, { "dataPath": "params_shard_226.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.47.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "b32c806d61e9c98773795137ba661f58" }, { "dataPath": "params_shard_227.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.6.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "4bcf5fee33bca1c01df61baae15965c4" }, { "dataPath": "params_shard_228.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "0b5773ba6f4403a73e0e97cd5bcc97ca" }, { "dataPath": "params_shard_229.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "cec69728dee67ce16f80840dc1994e47" }, { "dataPath": "params_shard_230.bin", "format": "raw-shard", "nbytes": 24416256, "records": [ { "name": "model.layers.47.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.47.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.47.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18886656 }, { "name": "model.layers.6.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18898944 }, { "name": "model.layers.6.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21258240 }, { "name": "model.layers.6.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24403968 } ], "md5sum": "a051292576a8fc00a20c884549ff23a6" }, { "dataPath": "params_shard_231.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5bd97984ebb108261b2d8981f5775f4d" }, { "dataPath": "params_shard_232.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.7.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "e8515fc212f5fb9326e9917d5aeadf7f" }, { "dataPath": "params_shard_233.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "23b8daedb91c8cf9953227c3fa0abeb6" }, { "dataPath": "params_shard_234.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.7.feed_forward.gate_up_proj.q_weight", "shape": [ 32768, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "8c465c9a9b5cd6d3127d54bb394425c1" }, { "dataPath": "params_shard_235.bin", "format": "raw-shard", "nbytes": 24403968, "records": [ { "name": "model.layers.6.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.6.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.6.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.7.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.7.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 }, { "name": "model.layers.7.attention_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 24391680 } ], "md5sum": "e233e3602a93e876e918b12cda350ff8" }, { "dataPath": "params_shard_236.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.feed_forward.w2.q_weight", "shape": [ 6144, 2048 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "da608faf06dea26b5e238c7a3fa4b172" }, { "dataPath": "params_shard_237.bin", "format": "raw-shard", "nbytes": 18874368, "records": [ { "name": "model.layers.8.attention.wo.q_weight", "shape": [ 6144, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 18874368, "byteOffset": 0 } ], "md5sum": "a41338d85c921853598a7ea236bc2340" }, { "dataPath": "params_shard_238.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.attention.wqkv.q_weight", "shape": [ 8192, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "bc1c6b272d262b186515042428a533e8" }, { "dataPath": "params_shard_239.bin", "format": "raw-shard", "nbytes": 284295168, "records": [ { "name": "output.q_weight", "shape": [ 92544, 768 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 284295168, "byteOffset": 0 } ], "md5sum": "97903f18a37886602bf0fb396e853bc9" }, { "dataPath": "params_shard_240.bin", "format": "raw-shard", "nbytes": 35536896, "records": [ { "name": "output.q_scale", "shape": [ 92544, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 35536896, "byteOffset": 0 } ], "md5sum": "1f1edc7d0bca4464d3e33796f650d039" }, { "dataPath": "params_shard_241.bin", "format": "raw-shard", "nbytes": 24391680, "records": [ { "name": "model.layers.7.feed_forward.gate_up_proj.q_scale", "shape": [ 32768, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12582912, "byteOffset": 0 }, { "name": "model.layers.7.feed_forward.w2.q_scale", "shape": [ 6144, 512 ], "dtype": "bfloat16", "format": "raw", "nbytes": 6291456, "byteOffset": 12582912 }, { "name": "model.layers.7.ffn_norm.weight", "shape": [ 6144 ], "dtype": "bfloat16", "format": "raw", "nbytes": 12288, "byteOffset": 18874368 }, { "name": "model.layers.8.attention.wo.q_scale", "shape": [ 6144, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2359296, "byteOffset": 18886656 }, { "name": "model.layers.8.attention.wqkv.q_scale", "shape": [ 8192, 192 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21245952 } ], "md5sum": "0bf2bc92da8dddfc1b427182b67dbf8e" } ] }