internlm2_5-20b-chat-q4f32_1-MLC / ndarray-cache-b16.json
riczhou's picture
Upload folder using huggingface_hub
c028536 verified
{
"metadata": {
"ParamSize": 485,
"ParamBytes": 12415229952.0,
"BitsPerParam": 4.553632488924832
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.0.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "716263458277ff83a3c46d6ce71cb2e7"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.0.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "822af490c3f00cb92dee80f82efdaf4a"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.0.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
},
{
"name": "model.layers.0.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18874368
},
{
"name": "model.layers.0.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21233664
},
{
"name": "model.layers.0.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24379392
}
],
"md5sum": "50b5a8c597ee10cf9bc7cb4fa7f42de1"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.0.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "62f9bc64bbaf245d93e96588af3f77ab"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 284295168,
"records": [
{
"name": "model.tok_embeddings.q_weight",
"shape": [
92544,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 284295168,
"byteOffset": 0
}
],
"md5sum": "33023165380a281e29e6c0891a18ac15"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 35536896,
"records": [
{
"name": "model.tok_embeddings.q_scale",
"shape": [
92544,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35536896,
"byteOffset": 0
}
],
"md5sum": "f263786d832df47074583e9f354a1e13"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.1.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "96b447fd7c2184937db674437dab9843"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.1.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bd69b76c0a28499d89219b5b2f3e76a9"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "01cfc9de14070a6e1bba13da2cc46949"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.0.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.0.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.0.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.1.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.1.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.1.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "d3b17fe7e5eab772e75afd02d45b4ab2"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.1.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5d8f188559e6a439e59d930b7d9fa579"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.2.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4005d53bcbf910fda00b8c4d5b5cb7e5"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.2.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3fead2bf931e44cad7e79d48ef0a9fe0"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.2.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "9fb1045f34211028c510010ebc95af7a"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.1.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.1.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.1.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.2.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.2.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.2.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "f2b7684535c4547cacc6c5bab05f4041"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.2.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "99a0ee2dd8cbb1c9fa47528046b7a4fb"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.3.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "1ff931500a186275ffda8e7ac75f8126"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.3.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "36946b926dc1a54a33fb39691e988b08"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.3.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "df7b7f2e51e7ebc661f331965fdf5f0a"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.2.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.2.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.2.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.3.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.3.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "e4dd5b6a9627a4304eee1deff0acd1b1"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 31457280,
"records": [
{
"name": "model.layers.3.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 12582912
}
],
"md5sum": "6fc3d4915773bfe4dfebcea75363a20a"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.10.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e2397bf08637903fc2a829b061482ca1"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 30683136,
"records": [
{
"name": "model.layers.10.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.10.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 2359296
},
{
"name": "model.layers.10.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 27525120
},
{
"name": "model.layers.10.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 30670848
}
],
"md5sum": "ed3624d2577bb72001b0a17b93dfedb8"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.10.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c3b4cc8feb72e32c6fb5696a05a0532e"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.8.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "84e5ba0b76e2db1877f489dc52bdc75a"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.8.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3606aad64721f86f69283b5133489ce3"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 25190400,
"records": [
{
"name": "model.layers.10.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.10.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.10.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.8.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.8.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 18898944
}
],
"md5sum": "07ef6dbefa61b37f670f813e86614242"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.8.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.8.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.9.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 12595200
}
],
"md5sum": "408803a0fe7b923743147c7d8b281d9a"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.9.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "f32990c8b6b6cfd96d17b24679657a04"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 30683136,
"records": [
{
"name": "model.layers.9.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 0
},
{
"name": "model.layers.9.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 2359296
},
{
"name": "model.layers.9.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 27525120
},
{
"name": "model.layers.9.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 30670848
}
],
"md5sum": "851267d838d3844ba290933e0285b082"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.9.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "092412258cddab7562d5970316992976"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.11.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4fe5531ac1f7423ae43a4c3737762985"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.11.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "a2d0819e5f0643f229b31032c911bb6b"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.11.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "276e17f46f2cbdcd6eacdcea31d5cf93"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.9.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.9.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.9.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.11.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.11.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.11.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "abccfb40e6ef9ca480dfaa0bdc229c9a"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.11.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "47bc26bedf0a1eedbb2e9ff244cd6932"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.12.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "13edcba6d8c43e1f82991df743109648"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.12.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "22e564a20bd07c599d6f3ce0f018c44c"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "47835e2aa415d479974c59b3a0c8cee1"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.11.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.11.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.11.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.12.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.12.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.12.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "a96b9c6c4c575b3c546947635a6dd63a"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.12.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "afbc03dc736ceae546912fb4d602a404"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.13.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "788f8f9ee1fce1c764d5fe5565b24fef"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.13.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "5be02f711ede85af6c8fd2e685460895"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.13.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "96783c79f409f8cb260abe2458f924f1"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.12.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.12.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.12.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.13.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.13.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "7d2656749fb8278089bc5f53aa728d9a"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.13.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "0911001f29254765971b9b9261f96c05"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.14.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "15b3c5a6d8fbd0579e09f4b8066e65f8"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.14.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "47c3387f854279a100a9481d9a51b9a0"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.14.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8dd5beb541e184bfe823f60e99e12ee7"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.13.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.13.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.13.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.13.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.14.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.14.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.14.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "4f9a9df58c2aff42f1257dcc06207ba7"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.14.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5dc255fcaa6d2ebedbbd6472e60374ce"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.15.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "658f2e2bc6d693803d5f492492d0b867"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.15.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "b7ef6ebe7ffca7a347a955e13f41991a"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.15.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "4001079367da021cb7a6410cebacbba5"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.14.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.14.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.14.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.15.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.15.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.15.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "b4c542629ed6200aa40b2f59ef5dbd27"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.15.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "2d0a1783c3fc9b6bf243122ba4c1341f"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.16.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "fc62520dfe3a7f801c884d6f40f53ebc"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.16.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "c4291ebdf8a976c43f5e89c8fb701f43"
},
{
"dataPath": "params_shard_58.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "1f6930048b35b479c57fcd0bf50e8688"
},
{
"dataPath": "params_shard_59.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.15.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.15.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.15.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.16.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.16.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.16.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "765778843b2671508c7657f45de48916"
},
{
"dataPath": "params_shard_60.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.16.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "9c4505b49e2a22b0139e8e911d7814d4"
},
{
"dataPath": "params_shard_61.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.17.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "b9e40859330c26e12cd30dd32b5ce0b3"
},
{
"dataPath": "params_shard_62.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.17.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "8c2d6fdf5b6b478a30e07043a3bd46b1"
},
{
"dataPath": "params_shard_63.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.17.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "c4bad6632cec11ce9fff010672657e18"
},
{
"dataPath": "params_shard_64.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.16.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.16.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.16.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.17.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.17.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.17.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "98fc53327d48d9c8d549c3ef03091250"
},
{
"dataPath": "params_shard_65.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.17.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "458a01858d11b6976e83ca67736d3882"
},
{
"dataPath": "params_shard_66.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.18.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "18c9e8b2fa36b81fc91efd282dc56843"
},
{
"dataPath": "params_shard_67.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.18.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "352092fa3a117c54053ae74394a9bfe8"
},
{
"dataPath": "params_shard_68.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.18.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "36e2be42bc529079655991e9b1b0edd3"
},
{
"dataPath": "params_shard_69.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.17.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.17.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.17.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.18.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.18.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "7039044fe6e1ed73356fc6584ce04d07"
},
{
"dataPath": "params_shard_70.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.18.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3d3578cfa01d104cfc3bc5fb14802dce"
},
{
"dataPath": "params_shard_71.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.19.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "5ccf97c4e498af5f5e42b28d36978c0d"
},
{
"dataPath": "params_shard_72.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.19.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bd031a77cc067ea75ae980e3135df59a"
},
{
"dataPath": "params_shard_73.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.19.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b9ede40e028dc538c97d1296b87ac926"
},
{
"dataPath": "params_shard_74.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.18.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.18.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.18.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.18.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.19.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.19.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.19.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "36419df511374884ec6090653aaeb703"
},
{
"dataPath": "params_shard_75.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.19.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f2236181b94ba9eded8b77a9e4c36c21"
},
{
"dataPath": "params_shard_76.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.20.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "363672288a990054f3d91a0d8e0515ee"
},
{
"dataPath": "params_shard_77.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.20.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d37d60cab3032a3e757527f86f4e4378"
},
{
"dataPath": "params_shard_78.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d8d02c403e85c7c9e1288930fe08a4f7"
},
{
"dataPath": "params_shard_79.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.19.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.19.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.19.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.20.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.20.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.20.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "61d073aa2944c35e2b12fa4eba7ab1d5"
},
{
"dataPath": "params_shard_80.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.20.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a2962b4f452105c59e8a3932f6ce4aa5"
},
{
"dataPath": "params_shard_81.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.21.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "08a928683c06e6c1ce52ea864f61aca9"
},
{
"dataPath": "params_shard_82.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.21.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "e6283e7a1b84a3d55300a55ed28a6345"
},
{
"dataPath": "params_shard_83.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "ae5326ba4129ad8475fd590beaf15ad4"
},
{
"dataPath": "params_shard_84.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.20.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.20.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.20.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.21.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.21.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.21.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "fd2568e049aa6b649b10778f5832e779"
},
{
"dataPath": "params_shard_85.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.21.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "cb8b21ad7e16ddda20dcfe5b434a6cf6"
},
{
"dataPath": "params_shard_86.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.22.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e273312afefc1908838ce1592500fe45"
},
{
"dataPath": "params_shard_87.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.22.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "38ec7c3ae25f6cf3b213faa0fb3c9f9e"
},
{
"dataPath": "params_shard_88.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.22.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "c020d25711b0ad5ccd81542d70da6e70"
},
{
"dataPath": "params_shard_89.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.21.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.21.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.21.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.22.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.22.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.22.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "a117d3cfab3c419189f8ba368c9ffbb4"
},
{
"dataPath": "params_shard_90.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.22.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "d9e505593dc5161b19011c04d5ecb149"
},
{
"dataPath": "params_shard_91.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.23.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0a69edded9673c9fbd3e206dc1988b57"
},
{
"dataPath": "params_shard_92.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.23.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "36a206c339057ab110a2af28d365f9fa"
},
{
"dataPath": "params_shard_93.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.23.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "259de1775cdf657963951eed311acff4"
},
{
"dataPath": "params_shard_94.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.22.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.22.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.22.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.23.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.23.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "7f1cdee70a8d250ae940f0da77aedc30"
},
{
"dataPath": "params_shard_95.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.23.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "858f520d3eba4f505cd4ad77858ac9a1"
},
{
"dataPath": "params_shard_96.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.24.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d2a19673da5ad3ca66026da29356cff0"
},
{
"dataPath": "params_shard_97.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.24.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "3ce2414d0c1094222fdf42eee9aecbbd"
},
{
"dataPath": "params_shard_98.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.24.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "77195066b9e95ad84c99fc004b152000"
},
{
"dataPath": "params_shard_99.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.23.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.23.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.23.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.23.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.24.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.24.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.24.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "5e234a56183a9aa0d155e7633f368b7a"
},
{
"dataPath": "params_shard_100.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.24.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "544ca198afede6252dd0ef1d7f56a1c2"
},
{
"dataPath": "params_shard_101.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.25.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "699af39911b5cb6a162166ed0e738a43"
},
{
"dataPath": "params_shard_102.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.25.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "afe08664a54f124489628536dc9dc1a8"
},
{
"dataPath": "params_shard_103.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.25.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5ed3c5448897387d6d0e4f8b0f00b8ce"
},
{
"dataPath": "params_shard_104.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.24.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.24.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.24.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.25.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.25.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.25.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "806004dd038b2d3592e1923663c0579f"
},
{
"dataPath": "params_shard_105.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.25.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "1f14970a54bf3ddd401abaf6a95c1e37"
},
{
"dataPath": "params_shard_106.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.26.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "1857d752a47db500c22f548fc65e551a"
},
{
"dataPath": "params_shard_107.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.26.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "80c9bdff2d0767a1e44637f0cc9a14c1"
},
{
"dataPath": "params_shard_108.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.26.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d32a9438c7b57feeb9c599a86f133f57"
},
{
"dataPath": "params_shard_109.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.25.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.25.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.25.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.26.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.26.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.26.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "a627109c55de246ec1dc39e8748c7916"
},
{
"dataPath": "params_shard_110.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.26.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "7f9d39c8744f99c5503f0b46715d3731"
},
{
"dataPath": "params_shard_111.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.27.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "8bbe9836c83d51b01f0cc798d9366484"
},
{
"dataPath": "params_shard_112.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.27.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0840dfdf3a249cf22f01842c716a240b"
},
{
"dataPath": "params_shard_113.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.27.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "57de330e38f0bdd92dc46e16db89dcbe"
},
{
"dataPath": "params_shard_114.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.26.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.26.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.26.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.27.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.27.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.27.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "bc452cab7df8eb6689b650424af6e37d"
},
{
"dataPath": "params_shard_115.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.27.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "ac81d55f14703238822d9b7142eb3e80"
},
{
"dataPath": "params_shard_116.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.28.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "0d15a18b11c0ac7d141197a3cffd5d6e"
},
{
"dataPath": "params_shard_117.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.28.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "52d301feff3e97d539dad635a388616a"
},
{
"dataPath": "params_shard_118.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.28.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d941e93f775fa1980acce970a71d0d10"
},
{
"dataPath": "params_shard_119.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.27.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.27.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.27.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.28.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.28.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "527d53dfcf345f55e10f186b6c67c29a"
},
{
"dataPath": "params_shard_120.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.28.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "48a8ee459fde350836b3ab5d309ec52f"
},
{
"dataPath": "params_shard_121.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.29.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "098e8c76a136c1af57c88ee5475bd61c"
},
{
"dataPath": "params_shard_122.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.29.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "fe0bb7ec73c16c294611edc6035b1c7b"
},
{
"dataPath": "params_shard_123.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.29.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "a19259de1d8530f22a6cc98f1c611800"
},
{
"dataPath": "params_shard_124.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.28.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.28.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.28.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.28.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.29.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.29.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.29.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "f781555716073f1376db51ab11b14068"
},
{
"dataPath": "params_shard_125.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.29.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "15a15b960c29c3b3150742696675df13"
},
{
"dataPath": "params_shard_126.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.30.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "479daeebb45ac3afcc64cff340e78108"
},
{
"dataPath": "params_shard_127.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.30.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "80a7f78774f60ade2deae5e1c7181ea7"
},
{
"dataPath": "params_shard_128.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.30.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "2dd02cfca66b70346c9616533218d801"
},
{
"dataPath": "params_shard_129.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.29.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.29.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.29.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.30.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.30.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.30.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "e7ba1704f926e8b5a18b95b5ae14d6de"
},
{
"dataPath": "params_shard_130.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.30.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a31e87571b5c1603c6675bdcb640bb86"
},
{
"dataPath": "params_shard_131.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.3.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "68a3a9847f2f5e96a98a5ff5b3cc22e1"
},
{
"dataPath": "params_shard_132.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.4.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "d6635932076ee7ae60d6870246fe4492"
},
{
"dataPath": "params_shard_133.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.4.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "dcf222a54df8826a64447ef22238e7d8"
},
{
"dataPath": "params_shard_134.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.4.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3b989b5910ca7ab8a0967d84a65e9ed3"
},
{
"dataPath": "params_shard_135.bin",
"format": "raw-shard",
"nbytes": 30720000,
"records": [
{
"name": "model.layers.30.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.30.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.30.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.3.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.3.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 18898944
},
{
"name": "model.layers.3.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 25190400
},
{
"name": "model.layers.4.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 25202688
},
{
"name": "model.layers.4.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 27561984
},
{
"name": "model.layers.4.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 30707712
}
],
"md5sum": "0079ced022acdc63d69bda6f15245559"
},
{
"dataPath": "params_shard_136.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.4.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "41379eda820249a81211dfa434f4a979"
},
{
"dataPath": "params_shard_137.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.5.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e38e3ceaefb4234f352beb9c96f342e2"
},
{
"dataPath": "params_shard_138.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.5.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f965941c2373d926264d5ec949873ac4"
},
{
"dataPath": "params_shard_139.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.5.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "60a71fdd8a778cf14738a95918be6c3c"
},
{
"dataPath": "params_shard_140.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.4.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.4.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.4.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.5.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.5.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.5.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "05d739cd6795ea011976c3c2065888b0"
},
{
"dataPath": "params_shard_141.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.5.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "87d22d8e36802a73d718706c7d6fd96b"
},
{
"dataPath": "params_shard_142.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.31.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "fa2a297bd096534817b06bea667d2fa1"
},
{
"dataPath": "params_shard_143.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.31.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0b29599af09de4cffdf24edb87700cbe"
},
{
"dataPath": "params_shard_144.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.31.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8cc7babf63523b3c6e44751ff942f402"
},
{
"dataPath": "params_shard_145.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.5.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.5.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.5.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.31.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.31.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.31.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "c8d51561127726d565757b921fca3a76"
},
{
"dataPath": "params_shard_146.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.31.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "50e816d5c9fe602559ee2d4ea4295b8c"
},
{
"dataPath": "params_shard_147.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.32.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "ab33239cbbd77eec2fae075c335172cb"
},
{
"dataPath": "params_shard_148.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.32.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "74c2e02fb8491c4910b234d7d4d45e13"
},
{
"dataPath": "params_shard_149.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.32.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "26e5fa91d9891f78cbd91980c2bc0ac8"
},
{
"dataPath": "params_shard_150.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.31.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.31.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.31.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.32.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.32.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.32.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "18c03d01f5c37b6451fe5ee339b4a090"
},
{
"dataPath": "params_shard_151.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.32.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "6a0859ea08b7e301b0b5018dcd56ffe5"
},
{
"dataPath": "params_shard_152.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.33.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "f79c9d511dc802fc225946963fd3e77e"
},
{
"dataPath": "params_shard_153.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.33.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "6689fbe1c7068caf0b123fb0c0cf342f"
},
{
"dataPath": "params_shard_154.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.33.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "3da8de763ec40e535dc1f4e3f885c8a9"
},
{
"dataPath": "params_shard_155.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.32.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.32.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.32.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.33.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.33.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "b4fcf771afaf81fcd94e6d274bf572e0"
},
{
"dataPath": "params_shard_156.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.33.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "11a6fdaca1a73e009506350ec82e7c8d"
},
{
"dataPath": "params_shard_157.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.34.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "76b7046f612b016d686346e7f0ab8127"
},
{
"dataPath": "params_shard_158.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.34.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "48a82ffa3e55a1732cf8e2564b6482c1"
},
{
"dataPath": "params_shard_159.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.34.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "b7bdec7af04ef0ec9ee769e40167d747"
},
{
"dataPath": "params_shard_160.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.33.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.33.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.33.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.33.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.34.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.34.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.34.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "89e1ebeec79650c27272f2d01add47bb"
},
{
"dataPath": "params_shard_161.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.34.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "9ae36c126677a6da92e38af573f4dcbb"
},
{
"dataPath": "params_shard_162.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.35.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "5a9e70ef4180df15959b4b74f0bdd515"
},
{
"dataPath": "params_shard_163.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.35.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "77dd68aeced77062d5cf579e5e4b8183"
},
{
"dataPath": "params_shard_164.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.35.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "0abb12874483071b1b93c5f7a73590c6"
},
{
"dataPath": "params_shard_165.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.34.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.34.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.34.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.35.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.35.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.35.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "bc0d7a412fbe8ce335f2bc37bfa628f3"
},
{
"dataPath": "params_shard_166.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.35.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "86bcdda8511f80761893c656f0dbd87a"
},
{
"dataPath": "params_shard_167.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.36.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4d9f421d1a9dd97afc88992b4c0be1b5"
},
{
"dataPath": "params_shard_168.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.36.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "acef8c9ca86ad8bf288a5a4ebaf2b5c3"
},
{
"dataPath": "params_shard_169.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.36.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e018ef6bc65f9e8336f3d0b7618b28c6"
},
{
"dataPath": "params_shard_170.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.35.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.35.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.35.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.36.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.36.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.36.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "2664b046847fb7fb932e82f7332a3cec"
},
{
"dataPath": "params_shard_171.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.36.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "c3b8e5a9559b78be63350c9bee34d3be"
},
{
"dataPath": "params_shard_172.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.37.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "6d9ff175b218398c37f7bd0e4060198d"
},
{
"dataPath": "params_shard_173.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.37.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "2aa61dffe2702ea1973c4b43103ade7e"
},
{
"dataPath": "params_shard_174.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.37.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "df985b133d2cff7d287af4c24532bed9"
},
{
"dataPath": "params_shard_175.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.36.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.36.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.36.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.37.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.37.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.37.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "4a87b3ace7cb42a5db8301cd85057455"
},
{
"dataPath": "params_shard_176.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.37.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "f196502bedfa81e74f6dcf6de936490d"
},
{
"dataPath": "params_shard_177.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.38.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "3227ae87c4db22aa11c73c124354a829"
},
{
"dataPath": "params_shard_178.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.38.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "035ee951305be1f8e29640d8808a1335"
},
{
"dataPath": "params_shard_179.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.38.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "64f4ff82bef9ce34c10a2c14502882e9"
},
{
"dataPath": "params_shard_180.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.37.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.37.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.37.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.38.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.38.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "6d707a3f880cb92bde7991add4deb11a"
},
{
"dataPath": "params_shard_181.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.38.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "bc1d8c93e48262e777acf4ef2c649ab1"
},
{
"dataPath": "params_shard_182.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.39.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "b606c4df0ac229b6f763bd45caed24c5"
},
{
"dataPath": "params_shard_183.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.39.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cac397c873151fc28493cd5cc8ac8462"
},
{
"dataPath": "params_shard_184.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.39.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "5b282a4e0f1f620883a8e8f4b2243fbc"
},
{
"dataPath": "params_shard_185.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.38.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.38.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.38.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.38.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.39.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.39.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.39.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "4a6621b0ff6c3aafb5ade1ae71a951f4"
},
{
"dataPath": "params_shard_186.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.39.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "80100a4ad9abf476cf0b6e213e0f955d"
},
{
"dataPath": "params_shard_187.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.40.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "9319877fac8932660b8c29d51c5eb2c3"
},
{
"dataPath": "params_shard_188.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.40.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bb3c5dd9b0337706c619c817dc593b8e"
},
{
"dataPath": "params_shard_189.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.40.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "6c6dd74c402afca8a0021b936bbe20c5"
},
{
"dataPath": "params_shard_190.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.39.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.39.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.39.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.40.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.40.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.40.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "a51684dc81a50644d97e5fe4e27b8937"
},
{
"dataPath": "params_shard_191.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.40.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "4c33ba7c0c4fdc575bedeffe80980d67"
},
{
"dataPath": "params_shard_192.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.41.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "5a0b42892c96af35138bc83ee3230170"
},
{
"dataPath": "params_shard_193.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.41.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4fa5a22a474349d9bbe54cbcfe380016"
},
{
"dataPath": "params_shard_194.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.41.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e267bd8f1ec923d86374074c1babe27b"
},
{
"dataPath": "params_shard_195.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.40.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.40.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.40.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.41.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.41.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.41.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "b80fcb15f5506390b151c83f3097396b"
},
{
"dataPath": "params_shard_196.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.41.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "27a8111120c8bdad801ab2b5280b2854"
},
{
"dataPath": "params_shard_197.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.42.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "9bbca7ec1cac3c634bbeeedbf4c01862"
},
{
"dataPath": "params_shard_198.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.42.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "d5ea3f07bbbbe2f5c812579a52c1e43b"
},
{
"dataPath": "params_shard_199.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.42.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "6ab6a928a629b9562e8db68dc0612cfc"
},
{
"dataPath": "params_shard_200.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.41.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.41.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.41.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.42.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.42.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.42.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "8bae582e3014ae7efa050d1de9ce12f7"
},
{
"dataPath": "params_shard_201.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.42.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "a13f83f8d1c9a51a34119326b3a3c828"
},
{
"dataPath": "params_shard_202.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.43.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "968cdba022ccef891817812ba7a2d163"
},
{
"dataPath": "params_shard_203.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.43.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bb19b59688203488294c00dc49cea71e"
},
{
"dataPath": "params_shard_204.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.43.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8ae40bf7740a1300ea953a2f44b07595"
},
{
"dataPath": "params_shard_205.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.42.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.42.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.42.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.43.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.43.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "86be1e4044031ddc2eb042e526bf4ef8"
},
{
"dataPath": "params_shard_206.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.43.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "00912badda50c26a43466995f1443980"
},
{
"dataPath": "params_shard_207.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.44.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "74a0c168eccfbb4d87cf4762c23e204e"
},
{
"dataPath": "params_shard_208.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.44.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "f5a2599d267a16f90abed4080a013ef5"
},
{
"dataPath": "params_shard_209.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.44.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e0ba2c861e4ef88f69ec409d64781188"
},
{
"dataPath": "params_shard_210.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.43.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.43.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 12582912
},
{
"name": "model.layers.43.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12595200
},
{
"name": "model.layers.43.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.44.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.44.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.44.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "802f24e1497755b57a89d729135bd6b4"
},
{
"dataPath": "params_shard_211.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.44.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "02085c4103357d5048262894e0675070"
},
{
"dataPath": "params_shard_212.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.45.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "fe669c4806d8f28e248fe20cb549babb"
},
{
"dataPath": "params_shard_213.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.45.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "cf374452ba9da844957b1c0c3cd302df"
},
{
"dataPath": "params_shard_214.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.45.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "df3962f80bba9c8c0162c31883bbf122"
},
{
"dataPath": "params_shard_215.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.44.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.44.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.44.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.45.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.45.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.45.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "8b6d92aa1f4065ee97a36717eaa14051"
},
{
"dataPath": "params_shard_216.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.45.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "dba137a9150d8c55e433e3f9e9f3eb9e"
},
{
"dataPath": "params_shard_217.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.46.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "ac2e15081a42527e7be1c2101b8bda37"
},
{
"dataPath": "params_shard_218.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.46.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "4e1f762a8d42c40f1f14587c6f6a3dae"
},
{
"dataPath": "params_shard_219.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.46.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "e39e392dbf164e60bcaae36e40fb88cc"
},
{
"dataPath": "params_shard_220.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.45.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.45.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.45.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.46.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.46.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.46.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "c9eb04ef4688c98c401ef8694b0cfac7"
},
{
"dataPath": "params_shard_221.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.46.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "3a0429b050d5a684557b5ceda801d368"
},
{
"dataPath": "params_shard_222.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.47.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "37d4053e28465b15285dafd92c614ce5"
},
{
"dataPath": "params_shard_223.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.47.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "ce7b92f08c11481b1c48ad5e30328adf"
},
{
"dataPath": "params_shard_224.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.47.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "90f23a4989070baa382c4f00452f3ae6"
},
{
"dataPath": "params_shard_225.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.46.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.46.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.46.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.47.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.47.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.47.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "a7a383f57edd1043efe922aff58a3511"
},
{
"dataPath": "params_shard_226.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.47.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "b32c806d61e9c98773795137ba661f58"
},
{
"dataPath": "params_shard_227.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.6.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "4bcf5fee33bca1c01df61baae15965c4"
},
{
"dataPath": "params_shard_228.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.6.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "0b5773ba6f4403a73e0e97cd5bcc97ca"
},
{
"dataPath": "params_shard_229.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "cec69728dee67ce16f80840dc1994e47"
},
{
"dataPath": "params_shard_230.bin",
"format": "raw-shard",
"nbytes": 24416256,
"records": [
{
"name": "model.layers.47.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.47.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.47.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18886656
},
{
"name": "model.layers.6.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18898944
},
{
"name": "model.layers.6.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21258240
},
{
"name": "model.layers.6.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24403968
}
],
"md5sum": "a051292576a8fc00a20c884549ff23a6"
},
{
"dataPath": "params_shard_231.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.6.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "5bd97984ebb108261b2d8981f5775f4d"
},
{
"dataPath": "params_shard_232.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.7.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "e8515fc212f5fb9326e9917d5aeadf7f"
},
{
"dataPath": "params_shard_233.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.7.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "23b8daedb91c8cf9953227c3fa0abeb6"
},
{
"dataPath": "params_shard_234.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.7.feed_forward.gate_up_proj.q_weight",
"shape": [
32768,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "8c465c9a9b5cd6d3127d54bb394425c1"
},
{
"dataPath": "params_shard_235.bin",
"format": "raw-shard",
"nbytes": 24403968,
"records": [
{
"name": "model.layers.6.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.6.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.6.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.7.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.7.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
},
{
"name": "model.layers.7.attention_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 24391680
}
],
"md5sum": "e233e3602a93e876e918b12cda350ff8"
},
{
"dataPath": "params_shard_236.bin",
"format": "raw-shard",
"nbytes": 50331648,
"records": [
{
"name": "model.layers.7.feed_forward.w2.q_weight",
"shape": [
6144,
2048
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 50331648,
"byteOffset": 0
}
],
"md5sum": "da608faf06dea26b5e238c7a3fa4b172"
},
{
"dataPath": "params_shard_237.bin",
"format": "raw-shard",
"nbytes": 18874368,
"records": [
{
"name": "model.layers.8.attention.wo.q_weight",
"shape": [
6144,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 18874368,
"byteOffset": 0
}
],
"md5sum": "a41338d85c921853598a7ea236bc2340"
},
{
"dataPath": "params_shard_238.bin",
"format": "raw-shard",
"nbytes": 25165824,
"records": [
{
"name": "model.layers.8.attention.wqkv.q_weight",
"shape": [
8192,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 25165824,
"byteOffset": 0
}
],
"md5sum": "bc1c6b272d262b186515042428a533e8"
},
{
"dataPath": "params_shard_239.bin",
"format": "raw-shard",
"nbytes": 284295168,
"records": [
{
"name": "output.q_weight",
"shape": [
92544,
768
],
"dtype": "uint32",
"format": "f32-to-bf16",
"nbytes": 284295168,
"byteOffset": 0
}
],
"md5sum": "97903f18a37886602bf0fb396e853bc9"
},
{
"dataPath": "params_shard_240.bin",
"format": "raw-shard",
"nbytes": 35536896,
"records": [
{
"name": "output.q_scale",
"shape": [
92544,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 35536896,
"byteOffset": 0
}
],
"md5sum": "1f1edc7d0bca4464d3e33796f650d039"
},
{
"dataPath": "params_shard_241.bin",
"format": "raw-shard",
"nbytes": 24391680,
"records": [
{
"name": "model.layers.7.feed_forward.gate_up_proj.q_scale",
"shape": [
32768,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12582912,
"byteOffset": 0
},
{
"name": "model.layers.7.feed_forward.w2.q_scale",
"shape": [
6144,
512
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 6291456,
"byteOffset": 12582912
},
{
"name": "model.layers.7.ffn_norm.weight",
"shape": [
6144
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 12288,
"byteOffset": 18874368
},
{
"name": "model.layers.8.attention.wo.q_scale",
"shape": [
6144,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 2359296,
"byteOffset": 18886656
},
{
"name": "model.layers.8.attention.wqkv.q_scale",
"shape": [
8192,
192
],
"dtype": "bfloat16",
"format": "raw",
"nbytes": 3145728,
"byteOffset": 21245952
}
],
"md5sum": "0bf2bc92da8dddfc1b427182b67dbf8e"
}
]
}