{ "metadata": { "ParamSize": 93, "ParamBytes": 8644714496.0, "BitsPerParam": 16.0 }, "records": [ { "dataPath": "params_shard_0.bin", "format": "raw-shard", "nbytes": 1050673152, "records": [ { "name": "lm_head.weight", "shape": [ 128256, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1050673152, "byteOffset": 0 } ], "md5sum": "86011fc8d6d280ac1805509c0d17de66" }, { "dataPath": "params_shard_1.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.10.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f79bfb3233473b999f28c56331ad915a" }, { "dataPath": "params_shard_2.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.10.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "f14ca0af5959e02f152e3ad93b013084" }, { "dataPath": "params_shard_3.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.10.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0236ca78b420fc9eaa6088d81d668dd7" }, { "dataPath": "params_shard_4.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.10.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "ea73f3ffeaba1745ad83655deadeca04" }, { "dataPath": "params_shard_5.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.11.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "1047dd811d238cbbe095dc92f50bfe18" }, { "dataPath": "params_shard_6.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.11.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9c0c5239b3f651fa98e265a4560aadb4" }, { "dataPath": "params_shard_7.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.11.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "ff88bdc45a8af96bd461074b11af25a0" }, { "dataPath": "params_shard_8.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.11.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "36545c44019aff7f7a1c048a750db858" }, { "dataPath": "params_shard_9.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.12.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "770522c2bd329cadfefc95b46b5f61c2" }, { "dataPath": "params_shard_10.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.12.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e14c1084fe3161a3cb638b9558ff44c0" }, { "dataPath": "params_shard_11.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.12.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "15205d46f0832355f97abbf70d25392b" }, { "dataPath": "params_shard_12.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.12.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "64c792ec9093b90ea524e7ca9d66e88c" }, { "dataPath": "params_shard_13.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.13.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "224583992660fbe33f5b1edc7679f26a" }, { "dataPath": "params_shard_14.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.13.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "b5005524be8f65a7ea75d26f91266c3c" }, { "dataPath": "params_shard_15.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.13.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "8ebc9dceb176f3a4a0fa48a864282915" }, { "dataPath": "params_shard_16.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.13.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e5b5397835c2cfa9a988d2c3242a4214" }, { "dataPath": "params_shard_17.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.14.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "57929e3c93c19bdb6d41142202b5d606" }, { "dataPath": "params_shard_18.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.14.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "4139f840b840b41bfb374955fbdc85f8" }, { "dataPath": "params_shard_19.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.14.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "a96c3f05d23eb6c1ac882faebea4bc16" }, { "dataPath": "params_shard_20.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.14.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "d64f181bfabb1aaae45d36bf6a541644" }, { "dataPath": "params_shard_21.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.9.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "9199dd4c779a1b9ae9ddccc5e7a3ec41" }, { "dataPath": "params_shard_22.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.9.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "e200800802d314abfcf32056ebf0a79f" }, { "dataPath": "params_shard_23.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.9.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "0dad768991391b52751313c54773674c" }, { "dataPath": "params_shard_24.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.9.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "0dcdae9b06e2c7a52045b8608f6d9a52" }, { "dataPath": "params_shard_25.bin", "format": "raw-shard", "nbytes": 1050673152, "records": [ { "name": "model.embed_tokens.weight", "shape": [ 128256, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 1050673152, "byteOffset": 0 } ], "md5sum": "6928f563b4a8b16250f0effe48d9bcf5" }, { "dataPath": "params_shard_26.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.0.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "942ed4a2a7a7b4f48bd685e2f5e07bc3" }, { "dataPath": "params_shard_27.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.0.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7121ed254b5c3edd8d121312dadbd784" }, { "dataPath": "params_shard_28.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.0.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2305a6f45b854305933aa42520b84edd" }, { "dataPath": "params_shard_29.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.0.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "f4d6700c18d5fbd61e230b6e69df0b33" }, { "dataPath": "params_shard_30.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.1.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6a0a90436e42dfa3d4b3c6099988aa24" }, { "dataPath": "params_shard_31.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.1.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "63d70cf82f6012b86e235038033468c3" }, { "dataPath": "params_shard_32.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.1.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "2bf3af3b5bb6026ef2fb915419a0770e" }, { "dataPath": "params_shard_33.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.1.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e23dab75bbfda9e4bcd58b816f37db0c" }, { "dataPath": "params_shard_34.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.2.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "80aa51d9c09bed1e30b4e3b152413705" }, { "dataPath": "params_shard_35.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.2.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "aa15b431ed384bbf2f6ad78c5396a53e" }, { "dataPath": "params_shard_36.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.2.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "e6d758eb8db2210fd4b6f8b1b86d8ab2" }, { "dataPath": "params_shard_37.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.2.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "9a442fd3fcc4ab1b41c398a19bf69577" }, { "dataPath": "params_shard_38.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.3.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "92fec42fe1bd462bc06483eabb12b85b" }, { "dataPath": "params_shard_39.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.3.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "59df764364a1d675dc1a2b6e7c0bbaf2" }, { "dataPath": "params_shard_40.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.3.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "5e9efa89d002489a38b90418a6920208" }, { "dataPath": "params_shard_41.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.3.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "e5f5e67cce4fb2d1282498f5174fc61b" }, { "dataPath": "params_shard_42.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.4.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "a3f91f10d5f1ca43e4abc087f1054d04" }, { "dataPath": "params_shard_43.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.4.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "af92b196f341133d04147e81caaa0c9a" }, { "dataPath": "params_shard_44.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.4.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "3dc3873f71c2b702784c155544e03a5f" }, { "dataPath": "params_shard_45.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.4.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "365b3f92bce3f1e82f92dfae0e40735e" }, { "dataPath": "params_shard_46.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.5.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "f362eec3a598f037131faa09906b5ac3" }, { "dataPath": "params_shard_47.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.5.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "a3d5d386f47253c86f8a55b7aa0e758d" }, { "dataPath": "params_shard_48.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.5.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "deaf1b90e0be86a02b0cf9e8a14ec7a5" }, { "dataPath": "params_shard_49.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.5.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "5f48b1ac5a7344db855b7d478a851c7e" }, { "dataPath": "params_shard_50.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.6.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "8fc8d07de6f753c6ccb515934686b7f3" }, { "dataPath": "params_shard_51.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.6.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "9e3f00e6ad7d4d249d46a8d9efd043a0" }, { "dataPath": "params_shard_52.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.6.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "68bbb0eccc4cd3e3296daf6e511295d0" }, { "dataPath": "params_shard_53.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.6.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "8e336da34882ebdfc6d845816feef2b7" }, { "dataPath": "params_shard_54.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.7.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "e0f3e1e8f296b175e21ae538089913a4" }, { "dataPath": "params_shard_55.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.7.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "56f9506158811691a794a0aaca552b08" }, { "dataPath": "params_shard_56.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.7.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "71f017181d1b305c213746e770d7bd80" }, { "dataPath": "params_shard_57.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.7.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "6bb4aa2445219780517787cb1fc79275" }, { "dataPath": "params_shard_58.bin", "format": "raw-shard", "nbytes": 117440512, "records": [ { "name": "model.layers.8.mlp.down_proj.weight", "shape": [ 4096, 14336 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 117440512, "byteOffset": 0 } ], "md5sum": "6408db051aaf584f12097843039a9642" }, { "dataPath": "params_shard_59.bin", "format": "raw-shard", "nbytes": 234881024, "records": [ { "name": "model.layers.8.mlp.gate_up_proj.weight", "shape": [ 28672, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 234881024, "byteOffset": 0 } ], "md5sum": "7367c444cb98789129325929efe691f8" }, { "dataPath": "params_shard_60.bin", "format": "raw-shard", "nbytes": 50331648, "records": [ { "name": "model.layers.8.self_attn.qkv_proj.weight", "shape": [ 6144, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 50331648, "byteOffset": 0 } ], "md5sum": "acaaa89aa3b04ca49164fd8545ab9369" }, { "dataPath": "params_shard_61.bin", "format": "raw-shard", "nbytes": 33554432, "records": [ { "name": "model.layers.8.self_attn.o_proj.weight", "shape": [ 4096, 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 33554432, "byteOffset": 0 } ], "md5sum": "24a709ec7c3313ab9c57bb0d1f02043a" }, { "dataPath": "params_shard_62.bin", "format": "raw-shard", "nbytes": 253952, "records": [ { "name": "model.layers.10.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 0 }, { "name": "model.layers.10.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 8192 }, { "name": "model.layers.11.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 16384 }, { "name": "model.layers.11.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 24576 }, { "name": "model.layers.12.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 32768 }, { "name": "model.layers.12.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 40960 }, { "name": "model.layers.13.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 49152 }, { "name": "model.layers.13.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 57344 }, { "name": "model.layers.14.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 65536 }, { "name": "model.layers.14.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 73728 }, { "name": "model.layers.9.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 81920 }, { "name": "model.layers.9.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 90112 }, { "name": "model.norm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 98304 }, { "name": "model.layers.0.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 106496 }, { "name": "model.layers.0.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 114688 }, { "name": "model.layers.1.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 122880 }, { "name": "model.layers.1.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 131072 }, { "name": "model.layers.2.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 139264 }, { "name": "model.layers.2.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 147456 }, { "name": "model.layers.3.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 155648 }, { "name": "model.layers.3.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 163840 }, { "name": "model.layers.4.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 172032 }, { "name": "model.layers.4.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 180224 }, { "name": "model.layers.5.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 188416 }, { "name": "model.layers.5.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 196608 }, { "name": "model.layers.6.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 204800 }, { "name": "model.layers.6.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 212992 }, { "name": "model.layers.7.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 221184 }, { "name": "model.layers.7.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 229376 }, { "name": "model.layers.8.input_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 237568 }, { "name": "model.layers.8.post_attention_layernorm.weight", "shape": [ 4096 ], "dtype": "float16", "format": "f32-to-bf16", "nbytes": 8192, "byteOffset": 245760 } ], "md5sum": "f4709a0c1b375f977fc08c1b20616202" } ] }