{ "metadata": { "ParamSize": 39, "ParamBytes": 2952896512.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 32000, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "03a65d61de7a52c70907ebf3fa9758aa" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "d12e6a357b596ce92179d4ea4b19ab72" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5fa24d0ee40a80c8a62a7bf90dfaa8f5" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "f88725746b4a861b10f490577197c64e" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "052c48f1fb0bdd535ecf92fe344164c0" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "86963edeb8ff7b9f21c95725a2a0d92c" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "942a2dc4de7d4f5f2b8146b9c411b25f" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "3aec97e2be4c4551e98c249eee9a90b9" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "0680dc1b0beff0cd8e3fb2b7189b65c3" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "51e3c754050f45feb09a64ff92d67992" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d68c5af73f2968c99e967f69ab1cc997" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "981fa5ab1c0cf50b47e8d26947039d4b" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "779e5926260fa5d3f2beedab6b9d3c38" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "105ee9f3e172f4052971b7cdef5a8bcc" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "556c5286ca315c99172e4232aedd4515" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "dbbf47ddc8ebcfc1f81a8346c0da5283" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "dcc685b20ab52daaa91da66f35b9ce23" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "dbfa8c7d780a8d2632e08cc797a752aa" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "2484304417644cdb2e2d109654a1f736" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "015be42b6f9599103a6851f5422cebbd" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "13392f17a75ff7d6aa15755f3485efc7" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 100663296, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 12288, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 100663296, "byteOffset": 0 } ], "md5sum": "88d89ea3b05485eb29a84866b45bc4c4" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "7a4fc355a3c058bd876b3e001ea67bec" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 180355072, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 22016, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 180355072, "byteOffset": 0 } ], "md5sum": "b4850fd6ab6c472e174fa3285188138a" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 90177536, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 4096, 11008 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 90177536, "byteOffset": 0 } ], "md5sum": "c941087506333ff04625f661e89e9cd0" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 262144000, "records": [ { "name": "lm_head.weight", "shape": [ 32000, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 262144000, "byteOffset": 0 } ], "md5sum": "a314468d893019244bd0da57381b7e91" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 106496, "records": [ { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16384 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24576 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32768 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 40960 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 49152 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 57344 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 65536 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 73728 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 81920 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 90112 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 98304 } ], "md5sum": "029997b0662e92accb22f44c8a832172" } ] }