{ "metadata": { "ParamSize": 305, "ParamBytes": 4319821824.0, "BitsPerParam": 5.000976230824355 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 209715200, "records": [ { "name": "lm_head.q_weight", "shape": [ 102400, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 209715200, "byteOffset": 0 } ], "md5sum": "606880b7a8dc848647d0f4f96a4b89ea" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.22.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b6da9737170d1c0b645180a4de7eddcd" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8492c07d2f5760fbc9219bdd3b678b50" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29040640, "records": [ { "name": "lm_head.q_scale", "shape": [ 102400, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 26214400, "byteOffset": 0 }, { "name": "model.layers.22.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 26214400 }, { "name": "model.layers.22.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 26222592 } ], "md5sum": "a1e27e3b38f9d5dbefa1a60a33844a7b" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "882786af5e221402ac6a93cfbe558d18" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31014912, "records": [ { "name": "model.layers.22.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 0 }, { "name": "model.layers.22.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 5636096 }, { "name": "model.layers.23.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 5644288 }, { "name": "model.layers.23.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 5652480 }, { "name": "model.layers.23.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 28196864 } ], "md5sum": "4b2506de5078dd4f8676e493bd57dde5" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 30810112, "records": [ { "name": "model.layers.23.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 0 }, { "name": "model.layers.23.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 5636096 }, { "name": "model.layers.23.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 5644288 } ], "md5sum": "2016cd57aa151424f83b8a11866639b5" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.24.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "1676066fb9d039225b9db1a7025a435b" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.24.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ed1f5c885371df0971b2d2eb1290b5a8" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.24.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "45266bce7406124aa81a10a348fde366" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.23.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.23.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.23.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.24.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.24.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.24.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.24.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.24.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.24.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "a088038b26d0b22649806cc1d028fdf9" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.25.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "d67505ab892df80fa16122767fd50761" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "da71da4dbe03b54fe30fd3cd21d3e8f9" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.24.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.25.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.25.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.25.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.25.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.25.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "f49e54b4bb128fc91b502800f3fe2e5c" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.26.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "52bb43a590afc5a108144c97e1545541" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.26.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "895d44bf500994768e69cc4acb0fe917" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.26.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "5e2ccbcac2ce04eac264d6a85fa38ba6" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.25.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.25.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.25.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.26.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.26.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.26.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.26.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.26.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.26.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "bad478a89ded29503b7ad5247596bab1" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.27.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "6c78425185d2ed4231feb8fa7d0ac144" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ccbde98336037080f6518ea492561a6f" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.26.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.27.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.27.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.27.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.27.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.27.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "845793104fed7cbf95ee1534a2d21659" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.28.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "20d73c91a3d72233d81982ce621a72c3" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.28.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "97661861b9330740351d75485aabdce9" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.28.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b4f341001044c445a5366ba03e97dbd1" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.27.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.27.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.27.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.28.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.28.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.28.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.28.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.28.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.28.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "1982a151e5efe66f797dffdcb665e8bd" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.29.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "fa27b88003a19faf081a3c9565bd1b77" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "52865cb1714cf5630b995a5583ea2df0" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.28.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.29.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.29.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.29.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.29.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.29.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "20b2f49b7fa06db00725aa97c436b952" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 209715200, "records": [ { "name": "model.embed_tokens.q_weight", "shape": [ 102400, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 209715200, "byteOffset": 0 } ], "md5sum": "cb7f2f3c841f0ee8657397755d0dbdd3" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 26214400, "records": [ { "name": "model.embed_tokens.q_scale", "shape": [ 102400, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 26214400, "byteOffset": 0 } ], "md5sum": "247d530128a3acc07316362bf878ef5b" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.0.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "538bfee51b0cd51ad8c57cef968c1b81" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "8de562a563fd2c319e18751f54a9a258" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1fa5bff53cc592bb798d1075ead4c32b" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 32595968, "records": [ { "name": "model.layers.29.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.29.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.29.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12591104 }, { "name": "model.layers.0.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12599296 }, { "name": "model.layers.0.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15417344 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21053440 }, { "name": "model.layers.0.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21061632 }, { "name": "model.layers.0.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24207360 } ], "md5sum": "e5647ccc8edffb18918b73b175d14772" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "61802bfcda32eb42d51fae14f9d8d6fb" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f2fc8896ab3e23fc577de0d2a5daf98f" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.0.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.1.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.1.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.1.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "4c543de50f9e1ad35153c1ba5a66e058" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.10.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "b9818960ffc4bd9a71205e649bf97e89" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "9b8747e93fbccd762a88ad4e768a8f40" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "df63e726070f154a4fd009e4a252587b" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.1.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.1.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.10.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.10.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.10.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.10.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "18052ca573905c87811a83b199e457e2" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "f005300edf1ab989f6e9e36de6bdda70" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "840c3be3ac1b54b4af55ca53b2a8a002" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.10.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.11.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.11.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.11.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "0388837eef43c12f07f59b200704206d" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.12.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "6ab677658eb4afa9234655d6261cfdba" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "3033c3ef797fc34862a4f5d1113748ab" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a75291043a554d9f9c32475111cfbe14" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.11.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.11.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.12.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.12.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.12.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.12.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "57fda20ee30ec1e11afa4858a8f9bc0d" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5754cc13d26ce5bd7cd6bbc429dcc37d" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "ee3e18ecc01bbd548beb64d76bc2fd1d" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.12.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.13.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.13.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.13.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "8ef26c4416be17c6da3b37f59eb43a4e" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.14.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "325ab20541b280b55ff2037492d905ed" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2074661de6b0194cd2f24cef5a3edc7f" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "1873dd4562e9ed7c493e77effac3ad80" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.13.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.13.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.14.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.14.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.14.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.14.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "a522d3059f4f9305f7f3b82a0f8474c2" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "bfc566d2e359a21811584069fdb6dfd6" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "4b56610368dd9a1efa9179a2e6b8fea1" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.14.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.15.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.15.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.15.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "46d6389203559e24c81a891ce53f13d2" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.16.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "4e30fe4386b207489f8af6b9e44c1381" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "df0953c545bebe4c332c05b9e87918ee" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "a49caeb6e52d4f009a05a386d275b189" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.15.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.15.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.15.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.16.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.16.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.16.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.16.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "7c22a6609bd775f754302364776bac3b" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "96ed8151fb856a871150dfb828ce66ca" }, { "dataPath": "params_shard_63.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "8f3cbfc6d5255e7ae1c468b71149e870" }, { "dataPath": "params_shard_64.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.16.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.17.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.17.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.17.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "c7bc07ccbcef367393c389e32b0f237d" }, { "dataPath": "params_shard_65.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.18.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "0806c61004ab81f16f2a0bba1264d6ba" }, { "dataPath": "params_shard_66.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "bcf10dd970bf297fc8442125fdc6c12b" }, { "dataPath": "params_shard_67.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "81e693dffdc97f72728d8cc0c0bfb90b" }, { "dataPath": "params_shard_68.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.17.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.17.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.17.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.18.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.18.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.18.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.18.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "cc6681044aeb3c1339b59890e1b39206" }, { "dataPath": "params_shard_69.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "ef9dce04f7a42cdb15b7f3332f3cca46" }, { "dataPath": "params_shard_70.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e2deca87c61d92102516851c4fe9bf68" }, { "dataPath": "params_shard_71.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.18.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.19.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.19.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.19.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "a1a4c25aaca07739b86d44a661b0f199" }, { "dataPath": "params_shard_72.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.2.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "87b45b61bc6024979fced00e910e4ace" }, { "dataPath": "params_shard_73.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "632bf70a29517427aae921f5f6f46fbc" }, { "dataPath": "params_shard_74.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "02d5b26cb847690b7d0b3d775d276279" }, { "dataPath": "params_shard_75.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.19.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.19.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.19.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.2.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.2.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.2.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.2.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "bc8ddd14b1ff9303d6393d7c979d2e53" }, { "dataPath": "params_shard_76.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "4d8c8a3584c50927613034b8f8cea42d" }, { "dataPath": "params_shard_77.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "6bc8e84046d80beab429a1cf7c6edfc4" }, { "dataPath": "params_shard_78.bin", "format": "raw-shard", "nbytes": 32063488, "records": [ { "name": "model.layers.2.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 1048576 }, { "name": "model.layers.20.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 1056768 }, { "name": "model.layers.20.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 23601152 }, { "name": "model.layers.20.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 26419200 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 32055296 } ], "md5sum": "4b23d8cfa463acb0553955d5347eb0ea" }, { "dataPath": "params_shard_79.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.21.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "e25872cd1502cc268068612bf3e810a6" }, { "dataPath": "params_shard_80.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "cb772f1c323ca7f4ada0327f7b8f0151" }, { "dataPath": "params_shard_81.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.21.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "51ee6a441d70c5438780aa43057a3423" }, { "dataPath": "params_shard_82.bin", "format": "raw-shard", "nbytes": 32587776, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 3145728 }, { "name": "model.layers.20.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 11534336 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 12582912 }, { "name": "model.layers.21.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 12591104 }, { "name": "model.layers.21.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 15409152 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.21.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 21053440 }, { "name": "model.layers.21.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 24199168 } ], "md5sum": "cc223878af98d797a73cb50991a85d65" }, { "dataPath": "params_shard_83.bin", "format": "raw-shard", "nbytes": 29360128, "records": [ { "name": "model.layers.21.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 1048576 }, { "name": "model.layers.22.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 26214400 } ], "md5sum": "027462be1b2294837b184dd763415a34" }, { "dataPath": "params_shard_84.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.22.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.22.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.3.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "5a8dad096acda11755fd1043218f08b0" }, { "dataPath": "params_shard_85.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "206813d374e06f6bb22562b704ee8827" }, { "dataPath": "params_shard_86.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "e5865516f88c52b7e7e5c3f90cb1eef6" }, { "dataPath": "params_shard_87.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.4.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "24e60c84726af450b6c80164d70858da" }, { "dataPath": "params_shard_88.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "48a911cd98a3967ffba6773f7f2f2ed8" }, { "dataPath": "params_shard_89.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d7cfb15ba54a4117678cca83caedd157" }, { "dataPath": "params_shard_90.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.3.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.3.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.3.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.3.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.3.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.4.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.4.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.4.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "e68f7cb78dec00e6ca96693eed60d900" }, { "dataPath": "params_shard_91.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.4.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.5.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "133da966c46812b369e7c3881543c5fc" }, { "dataPath": "params_shard_92.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "73a90e1bc10515347f4cbdd8085ed61e" }, { "dataPath": "params_shard_93.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "fc4a8187566cafcda9190d00f1bf5946" }, { "dataPath": "params_shard_94.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.6.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "790fe54ef8c522f5eaaba8b6ba8073eb" }, { "dataPath": "params_shard_95.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "a57f0e2b9da6daebd9899f604beaae85" }, { "dataPath": "params_shard_96.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "b6279ae29c6ade77a8621f8caffa756c" }, { "dataPath": "params_shard_97.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.5.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.5.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.5.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.5.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.5.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.6.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.6.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.6.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "1ff9166596f291d0a0d455f320801ccb" }, { "dataPath": "params_shard_98.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.6.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.7.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "9e572a685c864ab28a96e6b093f3b280" }, { "dataPath": "params_shard_99.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "5798be8e470dae8d8b6d2913fd518fa0" }, { "dataPath": "params_shard_100.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "33149eac79b8edc244cf49321a5ad60d" }, { "dataPath": "params_shard_101.bin", "format": "raw-shard", "nbytes": 22544384, "records": [ { "name": "model.layers.8.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 0 } ], "md5sum": "5e3ed348453f0e7739f1a0ee0a723ab1" }, { "dataPath": "params_shard_102.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "1e5657f2e0aa96e009efef532227ae0e" }, { "dataPath": "params_shard_103.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "f617da8d732289a198f2b8d2e46f2d88" }, { "dataPath": "params_shard_104.bin", "format": "raw-shard", "nbytes": 32661504, "records": [ { "name": "model.layers.7.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.7.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.7.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.7.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.7.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 21045248 }, { "name": "model.layers.8.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 21053440 }, { "name": "model.layers.8.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 23871488 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 29507584 }, { "name": "model.layers.8.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 29515776 } ], "md5sum": "0e813d19b44087e77c8f7994edf91632" }, { "dataPath": "params_shard_105.bin", "format": "raw-shard", "nbytes": 31989760, "records": [ { "name": "model.layers.8.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 8388608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 9437184 }, { "name": "model.layers.9.mlp.down_proj.q_weight", "shape": [ 4096, 1376 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 22544384, "byteOffset": 9445376 } ], "md5sum": "03da8c7e5307360c72ec479b33fd170c" }, { "dataPath": "params_shard_106.bin", "format": "raw-shard", "nbytes": 45088768, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.q_weight", "shape": [ 22016, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 45088768, "byteOffset": 0 } ], "md5sum": "2baa643cd4b5af6ce825f412ceb5e171" }, { "dataPath": "params_shard_107.bin", "format": "raw-shard", "nbytes": 25165824, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.q_weight", "shape": [ 12288, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 25165824, "byteOffset": 0 } ], "md5sum": "d279f693f9548f33dfdb6896de219857" }, { "dataPath": "params_shard_108.bin", "format": "raw-shard", "nbytes": 21045248, "records": [ { "name": "model.layers.9.mlp.down_proj.q_scale", "shape": [ 4096, 344 ], "dtype": "bfloat16", "format": "raw", "nbytes": 2818048, "byteOffset": 0 }, { "name": "model.layers.9.mlp.gate_up_proj.q_scale", "shape": [ 22016, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 5636096, "byteOffset": 2818048 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "bfloat16", "format": "raw", "nbytes": 8192, "byteOffset": 8454144 }, { "name": "model.layers.9.self_attn.qkv_proj.q_scale", "shape": [ 12288, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 3145728, "byteOffset": 8462336 }, { "name": "model.layers.9.self_attn.o_proj.q_weight", "shape": [ 4096, 512 ], "dtype": "uint32", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 11608064 }, { "name": "model.layers.9.self_attn.o_proj.q_scale", "shape": [ 4096, 128 ], "dtype": "bfloat16", "format": "raw", "nbytes": 1048576, "byteOffset": 19996672 } ], "md5sum": "15ce253834790194fc711813c6711d45" } ] }