CharlieFRuan's picture
Initial commit
5cc249e verified
{
"metadata": {
"ParamSize": 135,
"ParamBytes": 2200121344.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 131084288,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
32003,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131084288,
"byteOffset": 0
}
],
"md5sum": "c20cd1e65e8b9b8bcd6df7258ad692a5"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "67117d26e3683d3fb992ed64fb28a3ec"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "2b4cf50efe7b11e8d583d2cd7ec3f327"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "60d3e2175a0afeeea347dc304b764f28"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "e1ef9544115afa205802364e0809362c"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 31465472,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
}
],
"md5sum": "365482b49516f8138fc472cf0a025055"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "1c833bec4281e38c4b29f06cce87c206"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "a869232a80be393b4d1081950705645a"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "a33240d89c7a60afeb67f2aaa85576bb"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "eadf1b8d358535035b5b8bd27aa6c718"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 31465472,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
}
],
"md5sum": "d3170067741ce82bc338d9a567d562a5"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "8f7fbadbece4df84afc813140583d14a"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "d0e26dab25866fffe72002a9fd8721fb"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "8852fb1dad9f07ee115dcb2f1561f7cf"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "a277afa5a9784549dc94d508746f200f"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 31465472,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
}
],
"md5sum": "bd00ed5dedbdb94ea8a4955138fbe1ef"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "722c27ca67ce6a5fab8b68cff677fcf3"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "82ae0fddd173d4292ab9bcd66c9aad09"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.6.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.7.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "a095fafc2739735f56a29e082277429e"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "670d905e29bd865b33b8c0f2a3b9f5d8"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 31465472,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
}
],
"md5sum": "b7dc6a8421fc8110f95b9b5a8f384f6b"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "27fbd620e49f15b501fa1f5e80ba09bb"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "cf33ca730c2cce84b5a5fbb1e39b9774"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.8.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.9.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "92d1e9fe434432c5267791029ade35aa"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "bd6069b8a4686f97514179aec1dfe399"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 31465472,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
}
],
"md5sum": "2e50dfd07b18ad6feb80f4f9dce231bb"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "2a0423481b60d2ada523fba34a617515"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "e1cb04d558fd26722b1ec82fe1512b75"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.10.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.11.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "c8b49ae0ab414d6e4db03a1b61c85c60"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "bcae72462b2002c4dbc81b77b179bc91"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 31465472,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
}
],
"md5sum": "3230e6836eccb4fb3ca80088e0149561"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "6928b3404faeb5cb387b31dfc38a8f29"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "b4b49e428a50c53212c6fb9347338aeb"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.12.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.13.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "426ca978f11307c8655c9a2d5cafd686"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "0cbc3fe15f3386c29f02ebf34fdb6354"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 31465472,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
}
],
"md5sum": "3c8f312537af7fa69e14aa491ef22f1f"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.14.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "deb610c78f144668041f767ea8c75d1c"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.14.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "e12c044268cd33825b56fc6da812c951"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.14.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.14.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.14.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.14.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.15.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "c278ac2bd7da8a24810792fbdeb699fb"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.15.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "bb2c8c0957896ce1d3dd1e15cb6e7528"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 31465472,
"records": [
{
"name": "model.layers.15.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.15.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.15.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.15.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
}
],
"md5sum": "2f1facdd4dd8f3ed19cfb95c55f8b638"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.16.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "69ff403c5eb2b8d9d0a8295806eb556a"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.16.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "4a5757e5079bbe75f783be287cb47bd3"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.16.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.16.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.16.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.16.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.17.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "cc24c6b2cdcdf5c26a903d900568b52a"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.17.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "9277df931046d3d4d6c45eca5931b8a4"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 31465472,
"records": [
{
"name": "model.layers.17.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.17.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.17.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.17.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
}
],
"md5sum": "19c27da9cbe222113814f105b68b6a8d"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.18.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "612904acb3f0c114b19ae2b544dad7a6"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.18.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "771730159052b4f05ff7a95c6bef476e"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.18.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.18.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.18.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.18.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.19.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "321458dc293a90f188ce10b86983fef0"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.19.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "2551863673051555d98d5062241c4e45"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 31465472,
"records": [
{
"name": "model.layers.19.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.19.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.19.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.19.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
}
],
"md5sum": "ef5a2322b4cf204aaef2611d1ae7d3a6"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.20.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "ca686a2ddf2fb1e74a2968a0668966aa"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 23068672,
"records": [
{
"name": "model.layers.20.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 0
}
],
"md5sum": "66a1fe90dd5a3dcb8310478d5f9f52e3"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 29368320,
"records": [
{
"name": "model.layers.20.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 0
},
{
"name": "model.layers.20.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 10485760
},
{
"name": "model.layers.20.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18874368
},
{
"name": "model.layers.20.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 18878464
},
{
"name": "model.layers.21.self_attn.qkv_proj.weight",
"shape": [
2560,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 10485760,
"byteOffset": 18882560
}
],
"md5sum": "04d186cafe36c4ff46a123b74adf64ed"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 46137344,
"records": [
{
"name": "model.layers.21.mlp.gate_up_proj.weight",
"shape": [
11264,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 46137344,
"byteOffset": 0
}
],
"md5sum": "d70e0c24a80a281c60a590a4c51c4d20"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 131084288,
"records": [
{
"name": "lm_head.weight",
"shape": [
32003,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 131084288,
"byteOffset": 0
}
],
"md5sum": "2627d16359c1ba4ca975476b395114de"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 31469568,
"records": [
{
"name": "model.layers.21.self_attn.o_proj.weight",
"shape": [
2048,
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8388608,
"byteOffset": 0
},
{
"name": "model.layers.21.mlp.down_proj.weight",
"shape": [
2048,
5632
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 23068672,
"byteOffset": 8388608
},
{
"name": "model.layers.21.input_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31457280
},
{
"name": "model.layers.21.post_attention_layernorm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31461376
},
{
"name": "model.norm.weight",
"shape": [
2048
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 4096,
"byteOffset": 31465472
}
],
"md5sum": "ee8a5b28db683d67207b538efa571f54"
}
]
}