{ "metadata": { "ParamSize": 135, "ParamBytes": 2200121344.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 131084288, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 32003, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 131084288, "byteOffset": 0 } ], "md5sum": "c20cd1e65e8b9b8bcd6df7258ad692a5" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "67117d26e3683d3fb992ed64fb28a3ec" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "2b4cf50efe7b11e8d583d2cd7ec3f327" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "60d3e2175a0afeeea347dc304b764f28" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "e1ef9544115afa205802364e0809362c" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "365482b49516f8138fc472cf0a025055" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "1c833bec4281e38c4b29f06cce87c206" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "a869232a80be393b4d1081950705645a" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "a33240d89c7a60afeb67f2aaa85576bb" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "eadf1b8d358535035b5b8bd27aa6c718" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "d3170067741ce82bc338d9a567d562a5" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "8f7fbadbece4df84afc813140583d14a" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "d0e26dab25866fffe72002a9fd8721fb" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "8852fb1dad9f07ee115dcb2f1561f7cf" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "a277afa5a9784549dc94d508746f200f" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "bd00ed5dedbdb94ea8a4955138fbe1ef" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "722c27ca67ce6a5fab8b68cff677fcf3" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "82ae0fddd173d4292ab9bcd66c9aad09" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "a095fafc2739735f56a29e082277429e" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "670d905e29bd865b33b8c0f2a3b9f5d8" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "b7dc6a8421fc8110f95b9b5a8f384f6b" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "27fbd620e49f15b501fa1f5e80ba09bb" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "cf33ca730c2cce84b5a5fbb1e39b9774" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "92d1e9fe434432c5267791029ade35aa" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "bd6069b8a4686f97514179aec1dfe399" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "2e50dfd07b18ad6feb80f4f9dce231bb" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "2a0423481b60d2ada523fba34a617515" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "e1cb04d558fd26722b1ec82fe1512b75" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.10.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "c8b49ae0ab414d6e4db03a1b61c85c60" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "bcae72462b2002c4dbc81b77b179bc91" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "3230e6836eccb4fb3ca80088e0149561" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "6928b3404faeb5cb387b31dfc38a8f29" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "b4b49e428a50c53212c6fb9347338aeb" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "426ca978f11307c8655c9a2d5cafd686" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "0cbc3fe15f3386c29f02ebf34fdb6354" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "3c8f312537af7fa69e14aa491ef22f1f" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "deb610c78f144668041f767ea8c75d1c" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "e12c044268cd33825b56fc6da812c951" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.15.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "c278ac2bd7da8a24810792fbdeb699fb" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.15.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "bb2c8c0957896ce1d3dd1e15cb6e7528" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.15.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.15.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.15.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.15.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "2f1facdd4dd8f3ed19cfb95c55f8b638" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.16.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "69ff403c5eb2b8d9d0a8295806eb556a" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.16.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "4a5757e5079bbe75f783be287cb47bd3" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.16.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.16.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.16.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.16.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.17.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "cc24c6b2cdcdf5c26a903d900568b52a" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.17.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "9277df931046d3d4d6c45eca5931b8a4" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.17.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.17.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.17.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.17.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "19c27da9cbe222113814f105b68b6a8d" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.18.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "612904acb3f0c114b19ae2b544dad7a6" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.18.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "771730159052b4f05ff7a95c6bef476e" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.18.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.18.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.18.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.18.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.19.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "321458dc293a90f188ce10b86983fef0" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.19.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "2551863673051555d98d5062241c4e45" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 31465472, "records": [ { "name": "model.layers.19.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.19.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.19.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.19.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 } ], "md5sum": "ef5a2322b4cf204aaef2611d1ae7d3a6" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.20.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "ca686a2ddf2fb1e74a2968a0668966aa" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 23068672, "records": [ { "name": "model.layers.20.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 0 } ], "md5sum": "66a1fe90dd5a3dcb8310478d5f9f52e3" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 29368320, "records": [ { "name": "model.layers.20.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 0 }, { "name": "model.layers.20.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 10485760 }, { "name": "model.layers.20.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18874368 }, { "name": "model.layers.20.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 18878464 }, { "name": "model.layers.21.self_attn.qkv_proj.weight", "shape": [ 2560, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 10485760, "byteOffset": 18882560 } ], "md5sum": "04d186cafe36c4ff46a123b74adf64ed" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 46137344, "records": [ { "name": "model.layers.21.mlp.gate_up_proj.weight", "shape": [ 11264, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 46137344, "byteOffset": 0 } ], "md5sum": "d70e0c24a80a281c60a590a4c51c4d20" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 131084288, "records": [ { "name": "lm_head.weight", "shape": [ 32003, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 131084288, "byteOffset": 0 } ], "md5sum": "2627d16359c1ba4ca975476b395114de" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 31469568, "records": [ { "name": "model.layers.21.self_attn.o_proj.weight", "shape": [ 2048, 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8388608, "byteOffset": 0 }, { "name": "model.layers.21.mlp.down_proj.weight", "shape": [ 2048, 5632 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 23068672, "byteOffset": 8388608 }, { "name": "model.layers.21.input_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31457280 }, { "name": "model.layers.21.post_attention_layernorm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31461376 }, { "name": "model.norm.weight", "shape": [ 2048 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 4096, "byteOffset": 31465472 } ], "md5sum": "ee8a5b28db683d67207b538efa571f54" } ] }