|
{ |
|
"metadata": { |
|
"ParamSize": 199, |
|
"ParamBytes": 15231233024.0, |
|
"BitsPerParam": 16.0 |
|
}, |
|
"records": [ |
|
{ |
|
"dataPath": "params_shard_0.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1089994752, |
|
"records": [ |
|
{ |
|
"name": "model.embed_tokens.weight", |
|
"shape": [ |
|
152064, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1089994752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "639fff4937ff77096dba20d893a4bf9b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_1.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cbbc6a07cebe4053a22d356c4d5fa1d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_2.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8ce8537a836c55d41b2ca2ab682a631f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_3.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "500afea2b54ff3aed8ec28a3163c32e0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_4.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "901e57dd1f688086d386b0ab8755b8e6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_5.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2514ed8e7b2cfd6fcbedac96de4bf374" |
|
}, |
|
{ |
|
"dataPath": "params_shard_6.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "53a13d30b33932f09bddf0ed9c191c2e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_7.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.1.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "facd01cc80570a2d1cea0df749702026" |
|
}, |
|
{ |
|
"dataPath": "params_shard_8.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d49f32dbdb4944dd5851f829da7a454b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_9.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4129dbed4ddca89918df795edd58840c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_10.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "51891862a1695b8532066c3119e7bf5c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_11.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.2.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "632ee44d78fccb210b0a52ff1e9f6f16" |
|
}, |
|
{ |
|
"dataPath": "params_shard_12.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dfc05b5bdaf84a836c836d11ded0e9d6" |
|
}, |
|
{ |
|
"dataPath": "params_shard_13.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6e8baaa2d9c138e8f65d6ed07742d301" |
|
}, |
|
{ |
|
"dataPath": "params_shard_14.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1ee60ab16f88999fffbb900d0c1e5216" |
|
}, |
|
{ |
|
"dataPath": "params_shard_15.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.3.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dbbbfd7e6b6e2833933bcb21ea83014f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_16.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "975c051bf200ce8ab3edffb4ef49b08c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_17.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "306f1df9d2bb44c0d1313c4e83e8596a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_18.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6f163952301a89a82c8dad50f4466a6e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_19.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.4.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dc5f395ed48d3f0a1ce06a2a62071b8b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_20.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "003ca818a8613760f8241198e08b3f58" |
|
}, |
|
{ |
|
"dataPath": "params_shard_21.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3f4c241cf0a39241b6c6607d76cc12f4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_22.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d7ee3bff79f87e7562f2844e716e4d6a" |
|
}, |
|
{ |
|
"dataPath": "params_shard_23.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.5.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8c11499c8d6504291103c0da619c2a19" |
|
}, |
|
{ |
|
"dataPath": "params_shard_24.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "aa423bef77f5cdd74e4c500742c133fa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_25.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ad7ee0c3c0f73e4e259c636d09543ed9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_26.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4a44837f2bfe45ae33073326e1ff61b2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_27.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.6.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "68677e982dc93866e7cbd21d266ec94f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_28.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4ecc280eb83ff0de0a852086068c53a2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_29.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8d33410d4539bc6ee3185ec4c4a334c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_30.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5415b2dc3ffb018ac27ecf6795c4f0b0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_31.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.7.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b2e5e7b943ea7b6672e6652c887b9b87" |
|
}, |
|
{ |
|
"dataPath": "params_shard_32.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e5e3d44904f73b31e4126b77ec113907" |
|
}, |
|
{ |
|
"dataPath": "params_shard_33.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "166f2162eb376dcddca5d44ec0ac9724" |
|
}, |
|
{ |
|
"dataPath": "params_shard_34.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f517caef320499773309b5a75a8cd199" |
|
}, |
|
{ |
|
"dataPath": "params_shard_35.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.8.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "34003c300c554c7344938032cf66ba4f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_36.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ac44dd64b2395242573470bd1b9d9e6c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_37.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9b58ba9f6d7a5a092ddb1f0a2c39e071" |
|
}, |
|
{ |
|
"dataPath": "params_shard_38.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0690e06ff94648eafd457bf5d92f2670" |
|
}, |
|
{ |
|
"dataPath": "params_shard_39.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.9.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0127e25c351cdedfbe656ccaf6a83a27" |
|
}, |
|
{ |
|
"dataPath": "params_shard_40.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ec4b0dcdf85093234a621fc83555a7dc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_41.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9f9682646413c4562adf1d97826c504b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_42.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d894ba2c65bde16671a4576e7ac55639" |
|
}, |
|
{ |
|
"dataPath": "params_shard_43.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.10.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "641cc564a47f51a68f88526c06380129" |
|
}, |
|
{ |
|
"dataPath": "params_shard_44.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2fcb4589d486bc58ee0d7e23abf26f59" |
|
}, |
|
{ |
|
"dataPath": "params_shard_45.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "def9199a1d65256e29e41f619270d6d4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_46.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "65b0cea3f58cd025dfa16447cad536a8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_47.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.11.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "bcf0603cedae747bb3d7cebd0dd10962" |
|
}, |
|
{ |
|
"dataPath": "params_shard_48.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "3fbf6e45f97c1857086bee2c7d7344c7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_49.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1c65fff32dadb4179b91c5f0b7df0a73" |
|
}, |
|
{ |
|
"dataPath": "params_shard_50.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9090a2e507fcd8443d50a2abcc5db2e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_51.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.12.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d9a0f1c33663fd621b9cc0d5b5e3581b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_52.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "674c8a6c400c5c0965d0fd3d7e4ffb7d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_53.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d8683b1042e946b9adf20f596ec05a18" |
|
}, |
|
{ |
|
"dataPath": "params_shard_54.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8522342e5100c8da96e23864c38f756d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_55.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.13.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9e94e16c673e929a934e6ce7bd16ac91" |
|
}, |
|
{ |
|
"dataPath": "params_shard_56.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c15a10e7029aed04bdfeff412ec23772" |
|
}, |
|
{ |
|
"dataPath": "params_shard_57.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "278958e258496a675cfef7c6b92ec7b8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_58.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "32f08900bab3f36694e8a67f227a8afe" |
|
}, |
|
{ |
|
"dataPath": "params_shard_59.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.14.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d554bfc29419fdd1e0453bd247b13355" |
|
}, |
|
{ |
|
"dataPath": "params_shard_60.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "279cc0ddbd6b43a7b29e966a1ea2d508" |
|
}, |
|
{ |
|
"dataPath": "params_shard_61.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "69893637742e12a5fc3998782c239a96" |
|
}, |
|
{ |
|
"dataPath": "params_shard_62.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "dcab5135408d4ab101a75e8193a35faa" |
|
}, |
|
{ |
|
"dataPath": "params_shard_63.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.15.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9bc9ff2455bcf2698e9bbe82ecd9273d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_64.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5c4149839d67ed8212f52873d9b98e26" |
|
}, |
|
{ |
|
"dataPath": "params_shard_65.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8d643dcbd61467d10c4aac022bc836ab" |
|
}, |
|
{ |
|
"dataPath": "params_shard_66.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1fe025843dea4fd28e6530f9e46a8768" |
|
}, |
|
{ |
|
"dataPath": "params_shard_67.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.16.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4c5b4d99df112bd7819a52eceddb23e4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_68.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "697142268ab00f87b65bcfe3e0a10a8c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_69.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1b2885373bff9b30c2ee95f8873cddc4" |
|
}, |
|
{ |
|
"dataPath": "params_shard_70.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "67ba2ada3a32f0751bb8646add2c7dcc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_71.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.17.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "46268f19b9d760b3bfe4625c964f2034" |
|
}, |
|
{ |
|
"dataPath": "params_shard_72.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "653ca9f793f48c5e783799cb8e494359" |
|
}, |
|
{ |
|
"dataPath": "params_shard_73.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5bd1e1654ca162931fc646eb65e5dfe8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_74.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c19b337596b79398aa8244f58ebed28f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_75.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.18.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2e46f2327818a33aec50a8211ee49ad8" |
|
}, |
|
{ |
|
"dataPath": "params_shard_76.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "746af0f8230fe218f445dcac573c66f9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_77.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c7ff6b2b9038b29f484b4f22f136fce9" |
|
}, |
|
{ |
|
"dataPath": "params_shard_78.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "0ce3135aa4710d86160ebb80169069e7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_79.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.19.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ecdefcd50ee31da3cba7d243ab1e57a5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_80.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "63c94ade83ad684b4f33ca88848785b7" |
|
}, |
|
{ |
|
"dataPath": "params_shard_81.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "9080adb31749a9479b0d8eeea75acb50" |
|
}, |
|
{ |
|
"dataPath": "params_shard_82.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "33ce641c752ee9cac3b6c75b8585db06" |
|
}, |
|
{ |
|
"dataPath": "params_shard_83.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.20.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "2656e2df638894138023e862eb713712" |
|
}, |
|
{ |
|
"dataPath": "params_shard_84.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d4cc67d3b41fcbe375b1fc7fea0b2afb" |
|
}, |
|
{ |
|
"dataPath": "params_shard_85.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "efc2890658d5ae818e8869e58a671b27" |
|
}, |
|
{ |
|
"dataPath": "params_shard_86.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "e6f99774817c150d29d58d6be67cc92e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_87.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.21.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "fb0b1b2cd7b7c2d861a6f213fe55ea34" |
|
}, |
|
{ |
|
"dataPath": "params_shard_88.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33548288, |
|
"records": [ |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.0.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 9216 |
|
}, |
|
{ |
|
"name": "model.layers.0.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33039360 |
|
}, |
|
{ |
|
"name": "model.layers.0.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33046528 |
|
}, |
|
{ |
|
"name": "model.layers.1.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33053696 |
|
}, |
|
{ |
|
"name": "model.layers.1.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33062912 |
|
}, |
|
{ |
|
"name": "model.layers.1.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33070080 |
|
}, |
|
{ |
|
"name": "model.layers.2.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33077248 |
|
}, |
|
{ |
|
"name": "model.layers.2.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33086464 |
|
}, |
|
{ |
|
"name": "model.layers.2.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33093632 |
|
}, |
|
{ |
|
"name": "model.layers.3.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33100800 |
|
}, |
|
{ |
|
"name": "model.layers.3.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33110016 |
|
}, |
|
{ |
|
"name": "model.layers.3.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33117184 |
|
}, |
|
{ |
|
"name": "model.layers.4.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33124352 |
|
}, |
|
{ |
|
"name": "model.layers.4.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33133568 |
|
}, |
|
{ |
|
"name": "model.layers.4.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33140736 |
|
}, |
|
{ |
|
"name": "model.layers.5.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33147904 |
|
}, |
|
{ |
|
"name": "model.layers.5.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33157120 |
|
}, |
|
{ |
|
"name": "model.layers.5.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33164288 |
|
}, |
|
{ |
|
"name": "model.layers.6.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33171456 |
|
}, |
|
{ |
|
"name": "model.layers.6.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33180672 |
|
}, |
|
{ |
|
"name": "model.layers.6.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33187840 |
|
}, |
|
{ |
|
"name": "model.layers.7.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33195008 |
|
}, |
|
{ |
|
"name": "model.layers.7.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33204224 |
|
}, |
|
{ |
|
"name": "model.layers.7.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33211392 |
|
}, |
|
{ |
|
"name": "model.layers.8.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33218560 |
|
}, |
|
{ |
|
"name": "model.layers.8.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33227776 |
|
}, |
|
{ |
|
"name": "model.layers.8.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33234944 |
|
}, |
|
{ |
|
"name": "model.layers.9.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33242112 |
|
}, |
|
{ |
|
"name": "model.layers.9.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33251328 |
|
}, |
|
{ |
|
"name": "model.layers.9.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33258496 |
|
}, |
|
{ |
|
"name": "model.layers.10.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33265664 |
|
}, |
|
{ |
|
"name": "model.layers.10.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33274880 |
|
}, |
|
{ |
|
"name": "model.layers.10.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33282048 |
|
}, |
|
{ |
|
"name": "model.layers.11.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33289216 |
|
}, |
|
{ |
|
"name": "model.layers.11.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33298432 |
|
}, |
|
{ |
|
"name": "model.layers.11.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33305600 |
|
}, |
|
{ |
|
"name": "model.layers.12.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33312768 |
|
}, |
|
{ |
|
"name": "model.layers.12.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33321984 |
|
}, |
|
{ |
|
"name": "model.layers.12.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33329152 |
|
}, |
|
{ |
|
"name": "model.layers.13.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33336320 |
|
}, |
|
{ |
|
"name": "model.layers.13.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33345536 |
|
}, |
|
{ |
|
"name": "model.layers.13.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33352704 |
|
}, |
|
{ |
|
"name": "model.layers.14.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33359872 |
|
}, |
|
{ |
|
"name": "model.layers.14.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33369088 |
|
}, |
|
{ |
|
"name": "model.layers.14.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33376256 |
|
}, |
|
{ |
|
"name": "model.layers.15.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33383424 |
|
}, |
|
{ |
|
"name": "model.layers.15.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33392640 |
|
}, |
|
{ |
|
"name": "model.layers.15.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33399808 |
|
}, |
|
{ |
|
"name": "model.layers.16.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33406976 |
|
}, |
|
{ |
|
"name": "model.layers.16.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33416192 |
|
}, |
|
{ |
|
"name": "model.layers.16.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33423360 |
|
}, |
|
{ |
|
"name": "model.layers.17.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33430528 |
|
}, |
|
{ |
|
"name": "model.layers.17.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33439744 |
|
}, |
|
{ |
|
"name": "model.layers.17.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33446912 |
|
}, |
|
{ |
|
"name": "model.layers.18.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33454080 |
|
}, |
|
{ |
|
"name": "model.layers.18.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33463296 |
|
}, |
|
{ |
|
"name": "model.layers.18.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33470464 |
|
}, |
|
{ |
|
"name": "model.layers.19.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33477632 |
|
}, |
|
{ |
|
"name": "model.layers.19.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33486848 |
|
}, |
|
{ |
|
"name": "model.layers.19.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33494016 |
|
}, |
|
{ |
|
"name": "model.layers.20.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33501184 |
|
}, |
|
{ |
|
"name": "model.layers.20.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33510400 |
|
}, |
|
{ |
|
"name": "model.layers.20.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33517568 |
|
}, |
|
{ |
|
"name": "model.layers.21.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33524736 |
|
}, |
|
{ |
|
"name": "model.layers.21.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33533952 |
|
}, |
|
{ |
|
"name": "model.layers.21.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33541120 |
|
} |
|
], |
|
"md5sum": "4e42cd8492b5461758305584714d23bc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_89.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "12e7d842f41e13e97d019d53ee31dda5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_90.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "442976c32d6a3a081ac3fce9d4f704bd" |
|
}, |
|
{ |
|
"dataPath": "params_shard_91.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "a9dae54e79cfafbdfa055245f23e96a2" |
|
}, |
|
{ |
|
"dataPath": "params_shard_92.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6c8ae6f045a5270f17bf9ba3b761e27f" |
|
}, |
|
{ |
|
"dataPath": "params_shard_93.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "7554a14ef679d53dcaf1d5a4088b31fc" |
|
}, |
|
{ |
|
"dataPath": "params_shard_94.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "79ffbbece0d1ce43214dee99afe3b59b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_95.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.23.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "cf8da193517e57d8b3b253b5b75aa107" |
|
}, |
|
{ |
|
"dataPath": "params_shard_96.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "b027228819ab9843e4996fdce16c5989" |
|
}, |
|
{ |
|
"dataPath": "params_shard_97.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "4472ae0c4b7dec286e8ad205dcc3bd9b" |
|
}, |
|
{ |
|
"dataPath": "params_shard_98.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "1281c293ad83754abf903a934f453671" |
|
}, |
|
{ |
|
"dataPath": "params_shard_99.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.24.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "54431184f25dda0ba9c83f5cf9d7b963" |
|
}, |
|
{ |
|
"dataPath": "params_shard_100.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "f1981ecc6e302ec1dab92b94fbf7931e" |
|
}, |
|
{ |
|
"dataPath": "params_shard_101.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "10c0ab6b8bac2275345f70fc659faca0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_102.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "42cc19b1097ae8558fc007d12a9bd308" |
|
}, |
|
{ |
|
"dataPath": "params_shard_103.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.25.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "8d57dc558c660a70a72cbfeb45fa6eaf" |
|
}, |
|
{ |
|
"dataPath": "params_shard_104.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "6d0e29bb06b89450ce0e16ded49c8675" |
|
}, |
|
{ |
|
"dataPath": "params_shard_105.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "d14c19b54df724fa84dcf4f00a68d635" |
|
}, |
|
{ |
|
"dataPath": "params_shard_106.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c538a31fd20ca4a188b54920e01030be" |
|
}, |
|
{ |
|
"dataPath": "params_shard_107.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.26.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "52144e63d33fbb83b71f07531ded1ef5" |
|
}, |
|
{ |
|
"dataPath": "params_shard_108.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33030144, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "5cdeeea2a99992e6142760e7ca300d3c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_109.bin", |
|
"format": "raw-shard", |
|
"nbytes": 25690112, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.self_attn.o_proj.weight", |
|
"shape": [ |
|
3584, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 25690112, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "254dd6a25ae531ba545ad98e058cf98d" |
|
}, |
|
{ |
|
"dataPath": "params_shard_110.bin", |
|
"format": "raw-shard", |
|
"nbytes": 271581184, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.gate_up_proj.weight", |
|
"shape": [ |
|
37888, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 271581184, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "c53d95dc685275a16be35d4abcfb9697" |
|
}, |
|
{ |
|
"dataPath": "params_shard_111.bin", |
|
"format": "raw-shard", |
|
"nbytes": 135790592, |
|
"records": [ |
|
{ |
|
"name": "model.layers.27.mlp.down_proj.weight", |
|
"shape": [ |
|
3584, |
|
18944 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 135790592, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "ce938d992cb98ce810294aa27272fff0" |
|
}, |
|
{ |
|
"dataPath": "params_shard_112.bin", |
|
"format": "raw-shard", |
|
"nbytes": 1089994752, |
|
"records": [ |
|
{ |
|
"name": "lm_head.weight", |
|
"shape": [ |
|
152064, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 1089994752, |
|
"byteOffset": 0 |
|
} |
|
], |
|
"md5sum": "688822edc27946d29b00439e00e58b2c" |
|
}, |
|
{ |
|
"dataPath": "params_shard_113.bin", |
|
"format": "raw-shard", |
|
"nbytes": 33178624, |
|
"records": [ |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 0 |
|
}, |
|
{ |
|
"name": "model.layers.22.self_attn.c_attn.weight", |
|
"shape": [ |
|
4608, |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 33030144, |
|
"byteOffset": 9216 |
|
}, |
|
{ |
|
"name": "model.layers.22.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33039360 |
|
}, |
|
{ |
|
"name": "model.layers.22.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33046528 |
|
}, |
|
{ |
|
"name": "model.layers.23.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33053696 |
|
}, |
|
{ |
|
"name": "model.layers.23.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33062912 |
|
}, |
|
{ |
|
"name": "model.layers.23.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33070080 |
|
}, |
|
{ |
|
"name": "model.layers.24.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33077248 |
|
}, |
|
{ |
|
"name": "model.layers.24.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33086464 |
|
}, |
|
{ |
|
"name": "model.layers.24.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33093632 |
|
}, |
|
{ |
|
"name": "model.layers.25.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33100800 |
|
}, |
|
{ |
|
"name": "model.layers.25.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33110016 |
|
}, |
|
{ |
|
"name": "model.layers.25.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33117184 |
|
}, |
|
{ |
|
"name": "model.layers.26.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33124352 |
|
}, |
|
{ |
|
"name": "model.layers.26.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33133568 |
|
}, |
|
{ |
|
"name": "model.layers.26.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33140736 |
|
}, |
|
{ |
|
"name": "model.layers.27.self_attn.c_attn.bias", |
|
"shape": [ |
|
4608 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 9216, |
|
"byteOffset": 33147904 |
|
}, |
|
{ |
|
"name": "model.layers.27.input_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33157120 |
|
}, |
|
{ |
|
"name": "model.layers.27.post_attention_layernorm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33164288 |
|
}, |
|
{ |
|
"name": "model.norm.weight", |
|
"shape": [ |
|
3584 |
|
], |
|
"dtype": "float16", |
|
"format": "f32-to-bf16", |
|
"nbytes": 7168, |
|
"byteOffset": 33171456 |
|
} |
|
], |
|
"md5sum": "e49dc083a5d8f6a62e9ef5447bb42cc6" |
|
} |
|
] |
|
} |