illian01's picture
Add weights and config
d89e317
{
"metadata": {
"ParamSize": 39,
"ParamBytes": 2952896512.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 262144000,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
32000,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144000,
"byteOffset": 0
}
],
"md5sum": "03a65d61de7a52c70907ebf3fa9758aa"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.0.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "d12e6a357b596ce92179d4ea4b19ab72"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "5fa24d0ee40a80c8a62a7bf90dfaa8f5"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "f88725746b4a861b10f490577197c64e"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "052c48f1fb0bdd535ecf92fe344164c0"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.1.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "86963edeb8ff7b9f21c95725a2a0d92c"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "942a2dc4de7d4f5f2b8146b9c411b25f"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "3aec97e2be4c4551e98c249eee9a90b9"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "0680dc1b0beff0cd8e3fb2b7189b65c3"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.2.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "51e3c754050f45feb09a64ff92d67992"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "d68c5af73f2968c99e967f69ab1cc997"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "981fa5ab1c0cf50b47e8d26947039d4b"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "779e5926260fa5d3f2beedab6b9d3c38"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.3.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "105ee9f3e172f4052971b7cdef5a8bcc"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "556c5286ca315c99172e4232aedd4515"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "dbbf47ddc8ebcfc1f81a8346c0da5283"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "dcc685b20ab52daaa91da66f35b9ce23"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.4.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "dbfa8c7d780a8d2632e08cc797a752aa"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "2484304417644cdb2e2d109654a1f736"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "015be42b6f9599103a6851f5422cebbd"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "13392f17a75ff7d6aa15755f3485efc7"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 100663296,
"records": [
{
"name": "model.layers.5.self_attn.qkv_proj.weight",
"shape": [
12288,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 100663296,
"byteOffset": 0
}
],
"md5sum": "88d89ea3b05485eb29a84866b45bc4c4"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 33554432,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
4096,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33554432,
"byteOffset": 0
}
],
"md5sum": "7a4fc355a3c058bd876b3e001ea67bec"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 180355072,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
22016,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 180355072,
"byteOffset": 0
}
],
"md5sum": "b4850fd6ab6c472e174fa3285188138a"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 90177536,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
4096,
11008
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 90177536,
"byteOffset": 0
}
],
"md5sum": "c941087506333ff04625f661e89e9cd0"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 262144000,
"records": [
{
"name": "lm_head.weight",
"shape": [
32000,
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 262144000,
"byteOffset": 0
}
],
"md5sum": "a314468d893019244bd0da57381b7e91"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 106496,
"records": [
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 0
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 8192
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 16384
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 24576
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 32768
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 40960
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 49152
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 57344
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 65536
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 73728
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 81920
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 90112
},
{
"name": "model.norm.weight",
"shape": [
4096
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 8192,
"byteOffset": 98304
}
],
"md5sum": "029997b0662e92accb22f44c8a832172"
}
]
}