kechengcode's picture
Upload folder using huggingface_hub
a0df38a verified
{
"metadata": {
"ParamSize": 101,
"ParamBytes": 8705614848.0,
"BitsPerParam": 16.0
},
"records": [
{
"dataPath": "params_shard_0.bin",
"format": "raw-shard",
"nbytes": 1089994752,
"records": [
{
"name": "lm_head.weight",
"shape": [
152064,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1089994752,
"byteOffset": 0
}
],
"md5sum": "14ba0c2cb884467368ed3bd2c255170b"
},
{
"dataPath": "params_shard_1.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.10.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "b243720fa330a40c781d73598bb37311"
},
{
"dataPath": "params_shard_2.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.10.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "16ebdd0e55b7acfaca83243d5e9c1040"
},
{
"dataPath": "params_shard_3.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.10.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "3271d95707ec063482a601c950cc28fc"
},
{
"dataPath": "params_shard_4.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.11.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "9760646c6e45dd5a3c545151ca3f61c3"
},
{
"dataPath": "params_shard_5.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.11.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "634764c08b440390fcb395e399c9ec73"
},
{
"dataPath": "params_shard_6.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.11.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "ec5255945d6deb54edeeb3b61f7c2bbd"
},
{
"dataPath": "params_shard_7.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.11.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "73d9112c5255f6d8d6b60e8a87e1ac01"
},
{
"dataPath": "params_shard_8.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.12.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "b82847c0943bd1551f3b84eeab216ed4"
},
{
"dataPath": "params_shard_9.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.12.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "0eabb14b984e1c015c670b77cffbf6be"
},
{
"dataPath": "params_shard_10.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.12.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "c187460e8359d387bfedaf934ac0f646"
},
{
"dataPath": "params_shard_11.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.12.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "e66daf90d5e1ed9dc217825dd96e46e4"
},
{
"dataPath": "params_shard_12.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.13.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "540c0388ba638d23234f3dd385e26774"
},
{
"dataPath": "params_shard_13.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.13.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "c8e0523e2d976808d58b54f889d97ee8"
},
{
"dataPath": "params_shard_14.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.13.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "1b5eb84972102f37bc3ed9d89be5f459"
},
{
"dataPath": "params_shard_15.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.13.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "64fca7d68e54b4ea5b5429ea8ca99c8f"
},
{
"dataPath": "params_shard_16.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.8.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "60cda15a6f0386559aac0cd5e6d7ffe3"
},
{
"dataPath": "params_shard_17.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.8.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "8d77228e5da90942997d80366d11a132"
},
{
"dataPath": "params_shard_18.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.9.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "85f76c5c8b9640c520931ae853e9db07"
},
{
"dataPath": "params_shard_19.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.9.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "b9a69f8d68700d2a89e239bbb5cc8212"
},
{
"dataPath": "params_shard_20.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.9.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "b7573789ef93abe0b2a03e9638104f18"
},
{
"dataPath": "params_shard_21.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.9.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "29ba6f525741d01884a2c2455066ac4a"
},
{
"dataPath": "params_shard_22.bin",
"format": "raw-shard",
"nbytes": 1089994752,
"records": [
{
"name": "model.embed_tokens.weight",
"shape": [
152064,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 1089994752,
"byteOffset": 0
}
],
"md5sum": "eab7f1580f3d7da612180468e04e74e3"
},
{
"dataPath": "params_shard_23.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.0.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "8f255f7952726723f66cb23e9449ac27"
},
{
"dataPath": "params_shard_24.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.0.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "331bc7a2cd841d2622d6a2b658c91009"
},
{
"dataPath": "params_shard_25.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.0.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "2861e000bfe6d965d41f5e1092c50763"
},
{
"dataPath": "params_shard_26.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.0.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "25e256a5ccf25fa4a69528276ef3b6b0"
},
{
"dataPath": "params_shard_27.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.1.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "96ebc7cd4f13156ecc012b8a474dbdb2"
},
{
"dataPath": "params_shard_28.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.1.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "93c05c68a9c69b52b8767fbce584d23a"
},
{
"dataPath": "params_shard_29.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.1.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "01d8a77a773d9cb312e9a52a799bd5d9"
},
{
"dataPath": "params_shard_30.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.1.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "fc7997354679466205be888fefae0248"
},
{
"dataPath": "params_shard_31.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.2.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "6f77c8fe77e5f87583acc204ba9c03c5"
},
{
"dataPath": "params_shard_32.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.2.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "7f4d855ec114684c438d3ef22ba7b84f"
},
{
"dataPath": "params_shard_33.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.2.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "91f365dfff5ba01cdaaa8977626d041f"
},
{
"dataPath": "params_shard_34.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.2.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "5b8833e226def19b0cd97d7c3154f51c"
},
{
"dataPath": "params_shard_35.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.3.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "eb4c7d1fb5ff9387a71e2ae22c520b6f"
},
{
"dataPath": "params_shard_36.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.3.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "680be7e4577e5ccf7e265f7dff9a0eaa"
},
{
"dataPath": "params_shard_37.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.3.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "60119ba12e2ad9bdc042d90ccdb8ed87"
},
{
"dataPath": "params_shard_38.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.3.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "88053879f96079cfa67cae9ba14d8107"
},
{
"dataPath": "params_shard_39.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.4.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "7b730af45f53ba36b342f25dbb0c4583"
},
{
"dataPath": "params_shard_40.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.4.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "daf9b8be99b3df2601187126b5190a53"
},
{
"dataPath": "params_shard_41.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.4.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "1c574220ef3be8a9d9123bacdaa42f00"
},
{
"dataPath": "params_shard_42.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.4.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "8489341558d48ea7184b2040952534f4"
},
{
"dataPath": "params_shard_43.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.5.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "708b0427f5cdd2360430239c7b1b918e"
},
{
"dataPath": "params_shard_44.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.5.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "84afa5e9b366b694df927267e8464a17"
},
{
"dataPath": "params_shard_45.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.5.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "f3d3e51953f6d51ece79485f92678d95"
},
{
"dataPath": "params_shard_46.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.5.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "0eaff87011829467b39aa230b50e8367"
},
{
"dataPath": "params_shard_47.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.6.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "0cd87cb0de2bae8b12b182cd570a1676"
},
{
"dataPath": "params_shard_48.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.6.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "1282b720e05c09c678f0535db2b5b78f"
},
{
"dataPath": "params_shard_49.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.6.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "e2b71d9839700958718663722a8c1cea"
},
{
"dataPath": "params_shard_50.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.6.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "004bdcd6cd20a34c501db9399db1b59f"
},
{
"dataPath": "params_shard_51.bin",
"format": "raw-shard",
"nbytes": 135790592,
"records": [
{
"name": "model.layers.7.mlp.down_proj.weight",
"shape": [
3584,
18944
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 135790592,
"byteOffset": 0
}
],
"md5sum": "3acae7d1c65b7c673c76a8419aa5a37a"
},
{
"dataPath": "params_shard_52.bin",
"format": "raw-shard",
"nbytes": 271581184,
"records": [
{
"name": "model.layers.7.mlp.gate_up_proj.weight",
"shape": [
37888,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 271581184,
"byteOffset": 0
}
],
"md5sum": "c93d3489b9e01c1813b281a58b4b50d3"
},
{
"dataPath": "params_shard_53.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.7.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "e1952f2e90d397c6326a4ebfe4d87fe5"
},
{
"dataPath": "params_shard_54.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.7.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "239fc89d4a4f51ea8a1e0272fd1c0da3"
},
{
"dataPath": "params_shard_55.bin",
"format": "raw-shard",
"nbytes": 33030144,
"records": [
{
"name": "model.layers.8.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 0
}
],
"md5sum": "ac8a22fffc1b2b51697e7f38647dcda0"
},
{
"dataPath": "params_shard_56.bin",
"format": "raw-shard",
"nbytes": 25690112,
"records": [
{
"name": "model.layers.8.self_attn.o_proj.weight",
"shape": [
3584,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 25690112,
"byteOffset": 0
}
],
"md5sum": "c34e852cc01780eb2c8c92053433effa"
},
{
"dataPath": "params_shard_57.bin",
"format": "raw-shard",
"nbytes": 33367040,
"records": [
{
"name": "model.layers.10.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 0
},
{
"name": "model.layers.10.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 7168
},
{
"name": "model.layers.10.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 14336
},
{
"name": "model.layers.10.self_attn.c_attn.weight",
"shape": [
4608,
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 33030144,
"byteOffset": 23552
},
{
"name": "model.layers.11.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33053696
},
{
"name": "model.layers.11.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33060864
},
{
"name": "model.layers.11.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33068032
},
{
"name": "model.layers.12.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33077248
},
{
"name": "model.layers.12.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33084416
},
{
"name": "model.layers.12.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33091584
},
{
"name": "model.layers.13.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33100800
},
{
"name": "model.layers.13.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33107968
},
{
"name": "model.layers.13.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33115136
},
{
"name": "model.layers.8.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33124352
},
{
"name": "model.layers.8.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33131520
},
{
"name": "model.layers.9.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33138688
},
{
"name": "model.layers.9.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33145856
},
{
"name": "model.layers.9.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33153024
},
{
"name": "model.norm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33162240
},
{
"name": "model.layers.0.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33169408
},
{
"name": "model.layers.0.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33176576
},
{
"name": "model.layers.0.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33183744
},
{
"name": "model.layers.1.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33192960
},
{
"name": "model.layers.1.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33200128
},
{
"name": "model.layers.1.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33207296
},
{
"name": "model.layers.2.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33216512
},
{
"name": "model.layers.2.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33223680
},
{
"name": "model.layers.2.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33230848
},
{
"name": "model.layers.3.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33240064
},
{
"name": "model.layers.3.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33247232
},
{
"name": "model.layers.3.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33254400
},
{
"name": "model.layers.4.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33263616
},
{
"name": "model.layers.4.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33270784
},
{
"name": "model.layers.4.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33277952
},
{
"name": "model.layers.5.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33287168
},
{
"name": "model.layers.5.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33294336
},
{
"name": "model.layers.5.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33301504
},
{
"name": "model.layers.6.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33310720
},
{
"name": "model.layers.6.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33317888
},
{
"name": "model.layers.6.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33325056
},
{
"name": "model.layers.7.input_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33334272
},
{
"name": "model.layers.7.post_attention_layernorm.weight",
"shape": [
3584
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 7168,
"byteOffset": 33341440
},
{
"name": "model.layers.7.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33348608
},
{
"name": "model.layers.8.self_attn.c_attn.bias",
"shape": [
4608
],
"dtype": "float16",
"format": "f32-to-bf16",
"nbytes": 9216,
"byteOffset": 33357824
}
],
"md5sum": "e4c982148947f2c82d6965ad54885725"
}
]
}