diff --git a/ndarray-cache.json b/ndarray-cache.json new file mode 100644 index 0000000000000000000000000000000000000000..0cc9f3f2f8164131f4a2213fcd3492ab29eebd84 --- /dev/null +++ b/ndarray-cache.json @@ -0,0 +1,2214 @@ +{ + "metadata": { + "ParamSize": 146, + "ParamBytes": 3422752768.0, + "BitsPerParam": 16.0 + }, + "records": [ + { + "dataPath": "params_shard_0.bin", + "format": "raw-shard", + "nbytes": 201326592, + "records": [ + { + "name": "model.embed_tokens.weight", + "shape": [ + 49152, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 201326592, + "byteOffset": 0 + } + ], + "md5sum": "63e24f3fa1deb0776ef33616eaee78f3" + }, + { + "dataPath": "params_shard_1.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.0.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e211bf37e1b601f1dd81fd1f8cfb1046" + }, + { + "dataPath": "params_shard_2.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.0.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "9d7d8fd443f5a0181c4f0e5f4dacd392" + }, + { + "dataPath": "params_shard_3.bin", + "format": "raw-shard", + "nbytes": 25174016, + "records": [ + { + "name": "model.layers.0.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 0 + }, + { + "name": "model.layers.0.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 4096 + }, + { + "name": "model.layers.0.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 8192 + } + ], + "md5sum": "5419bcbffefce4c784e01d6b4bd84b5a" + }, + { + "dataPath": "params_shard_4.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.1.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2a6d04e731f5583463bec1dd287b6c79" + }, + { + "dataPath": "params_shard_5.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.1.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "aaf16f66d9da461c0ec3a0d440dcaa95" + }, + { + "dataPath": "params_shard_6.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.1.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "433c4efa299dafc77f8ae672a1d5ecfe" + }, + { + "dataPath": "params_shard_7.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.10.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "081111316087b149ca28459451c139cf" + }, + { + "dataPath": "params_shard_8.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.10.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "5fed2104f91d1b386ae9fd8bfb0f6cfc" + }, + { + "dataPath": "params_shard_9.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.10.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "ed444f9f0b0f7e90f71c946826d93fce" + }, + { + "dataPath": "params_shard_10.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.11.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "2c327b7d20777e661c7bebb583fe3865" + }, + { + "dataPath": "params_shard_11.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.11.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "89b30a16ffaf450f312bc9ce975c7e3a" + }, + { + "dataPath": "params_shard_12.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.11.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9baec45937680648095d44d6c1dc64dd" + }, + { + "dataPath": "params_shard_13.bin", + "format": "raw-shard", + "nbytes": 25190400, + "records": [ + { + "name": "model.layers.0.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.1.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.1.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.1.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "model.layers.10.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16785408 + }, + { + "name": "model.layers.10.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16789504 + }, + { + "name": "model.layers.10.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16793600 + }, + { + "name": "model.layers.11.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25182208 + }, + { + "name": "model.layers.11.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25186304 + } + ], + "md5sum": "9373f459e917bd948179ad571157b2a0" + }, + { + "dataPath": "params_shard_14.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.12.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "aeecc1eea90d5def12b95e8408bec3bf" + }, + { + "dataPath": "params_shard_15.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.12.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c8631f9276e90d4d3d0143dab1778bc9" + }, + { + "dataPath": "params_shard_16.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.12.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c792ad248ad7dfb9b4c628bedff8fc56" + }, + { + "dataPath": "params_shard_17.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.13.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "26d63092d06968873941d31915a3efac" + }, + { + "dataPath": "params_shard_18.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.13.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c8724388350455c553532e845d7be88a" + }, + { + "dataPath": "params_shard_19.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.13.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d642a9f3dde1666fc8348e5963804c8e" + }, + { + "dataPath": "params_shard_20.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.14.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "507fd2514d4f941d9e51ce9e7c581471" + }, + { + "dataPath": "params_shard_21.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.14.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "6d2c904cc9e48db7bba9a61b1d81156b" + }, + { + "dataPath": "params_shard_22.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.14.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "8ec2181b0f2ed7068547992a0c24469e" + }, + { + "dataPath": "params_shard_23.bin", + "format": "raw-shard", + "nbytes": 25190400, + "records": [ + { + "name": "model.layers.11.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.12.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.12.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.12.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "model.layers.13.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16785408 + }, + { + "name": "model.layers.13.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16789504 + }, + { + "name": "model.layers.13.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16793600 + }, + { + "name": "model.layers.14.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25182208 + }, + { + "name": "model.layers.14.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25186304 + } + ], + "md5sum": "4c1141809d483d29b0da833d8f2db40f" + }, + { + "dataPath": "params_shard_24.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.15.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e78ed6f222ad4c775c76666b4e0d28c1" + }, + { + "dataPath": "params_shard_25.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.15.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "6008e9bb5e433e49f0c46a1e00f949f1" + }, + { + "dataPath": "params_shard_26.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.15.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "7fb13f3321f95836c99963ccb6e3e19f" + }, + { + "dataPath": "params_shard_27.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.16.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d350a63de179a9593451aa63254c1140" + }, + { + "dataPath": "params_shard_28.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.16.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "5d37f6aca48a1932199ac62ce536f3d2" + }, + { + "dataPath": "params_shard_29.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.16.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "28fd4440dc6a682175f192b8e255d741" + }, + { + "dataPath": "params_shard_30.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.17.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b12e6086fc7901e1f987eb79b00560a5" + }, + { + "dataPath": "params_shard_31.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.17.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "05c47a6024b4ffbd3cd6ccda36d70fe8" + }, + { + "dataPath": "params_shard_32.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.17.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "d46a8a2a0b43c4b0d8f03a29c6a64648" + }, + { + "dataPath": "params_shard_33.bin", + "format": "raw-shard", + "nbytes": 25190400, + "records": [ + { + "name": "model.layers.14.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.15.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.15.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.15.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "model.layers.16.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16785408 + }, + { + "name": "model.layers.16.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16789504 + }, + { + "name": "model.layers.16.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16793600 + }, + { + "name": "model.layers.17.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25182208 + }, + { + "name": "model.layers.17.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25186304 + } + ], + "md5sum": "4983623fc70a0daa1585ca5f408e608c" + }, + { + "dataPath": "params_shard_34.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.18.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "561a6f1e18c20f3ab18efb919d44fbf8" + }, + { + "dataPath": "params_shard_35.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.18.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "75988d362d9c4b8c3f74953418a02350" + }, + { + "dataPath": "params_shard_36.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.18.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f8d5f073f6cbbfa81fc0721226301f3c" + }, + { + "dataPath": "params_shard_37.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.19.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "e78fc57e4498af73e8da01b270fde893" + }, + { + "dataPath": "params_shard_38.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.19.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "63cb871d089e9f52769103fee54056cf" + }, + { + "dataPath": "params_shard_39.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.19.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "52dd6cac14a4302857b12c644952ab93" + }, + { + "dataPath": "params_shard_40.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.2.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "972ce42b68db8e9ccf1b45200518ab18" + }, + { + "dataPath": "params_shard_41.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.2.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "fbe9aa1f72f7baf209a31a1f7ad0374e" + }, + { + "dataPath": "params_shard_42.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.2.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "1dd2764245fb1bb532381d0dd1f1ed35" + }, + { + "dataPath": "params_shard_43.bin", + "format": "raw-shard", + "nbytes": 25190400, + "records": [ + { + "name": "model.layers.17.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.18.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.18.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.18.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "model.layers.19.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16785408 + }, + { + "name": "model.layers.19.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16789504 + }, + { + "name": "model.layers.19.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16793600 + }, + { + "name": "model.layers.2.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25182208 + }, + { + "name": "model.layers.2.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25186304 + } + ], + "md5sum": "3361dac972f2132a361a2ce83bebd681" + }, + { + "dataPath": "params_shard_44.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.20.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d6a7928c3785fb285cd98a4e15272691" + }, + { + "dataPath": "params_shard_45.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.20.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "77fd4e5550f169ad91140c7a6131350a" + }, + { + "dataPath": "params_shard_46.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.20.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "94b9eae72d05359e35083721236aeec6" + }, + { + "dataPath": "params_shard_47.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.21.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "6843a1fc893fac059ddd744b40809c3c" + }, + { + "dataPath": "params_shard_48.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.21.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "1d8b5aac35c525a3368bf67909ea26d8" + }, + { + "dataPath": "params_shard_49.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.21.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "2e0117784a298020c10e7bc4aa0d6da7" + }, + { + "dataPath": "params_shard_50.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.22.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d6ac4ac0ca4259eb1f75e584527a75a4" + }, + { + "dataPath": "params_shard_51.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.22.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c0d2266f86e0cc5f3d4dac1323f964dc" + }, + { + "dataPath": "params_shard_52.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.22.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "e0416f94f0465473db705247782cff76" + }, + { + "dataPath": "params_shard_53.bin", + "format": "raw-shard", + "nbytes": 25190400, + "records": [ + { + "name": "model.layers.2.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.20.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.20.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.20.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "model.layers.21.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16785408 + }, + { + "name": "model.layers.21.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16789504 + }, + { + "name": "model.layers.21.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16793600 + }, + { + "name": "model.layers.22.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25182208 + }, + { + "name": "model.layers.22.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25186304 + } + ], + "md5sum": "c0ae11be1523f20b89cc48f9e22437f4" + }, + { + "dataPath": "params_shard_54.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.23.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "c32526674eed04f63cc7c6990c3ed7a9" + }, + { + "dataPath": "params_shard_55.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.23.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c2673ec782374720d1682a4c4025d90e" + }, + { + "dataPath": "params_shard_56.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.23.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "66501649850ab508edb0466a36b4c351" + }, + { + "dataPath": "params_shard_57.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.3.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "a76f83bdf370c8744e1cb789f8bc39e7" + }, + { + "dataPath": "params_shard_58.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.3.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "7c3d7a79975a54f0b8c3e9620758f5b1" + }, + { + "dataPath": "params_shard_59.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.3.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "95ed1e9c48c9c4ee135b000b41abdbad" + }, + { + "dataPath": "params_shard_60.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.4.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "ea1f685a286db392a7dc50a3c030ae34" + }, + { + "dataPath": "params_shard_61.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.4.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "0b2046111a6056cb49876231b1166129" + }, + { + "dataPath": "params_shard_62.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.4.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "124a4df85bed9d60dd738baa254f89d9" + }, + { + "dataPath": "params_shard_63.bin", + "format": "raw-shard", + "nbytes": 25190400, + "records": [ + { + "name": "model.layers.22.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.23.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.23.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.23.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "model.layers.3.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16785408 + }, + { + "name": "model.layers.3.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16789504 + }, + { + "name": "model.layers.3.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16793600 + }, + { + "name": "model.layers.4.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25182208 + }, + { + "name": "model.layers.4.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25186304 + } + ], + "md5sum": "3072c76422cf6a56f92e9afa361e71aa" + }, + { + "dataPath": "params_shard_64.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.5.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d311f8be4ff7fd41eaa9fb04f4945d51" + }, + { + "dataPath": "params_shard_65.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.5.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "8733b73f741fb49160767e2d31df4342" + }, + { + "dataPath": "params_shard_66.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.5.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "9ecc01ecff3d28f205ba13a4a5debc39" + }, + { + "dataPath": "params_shard_67.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.6.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "d75560dfe8bbdd6af5bceefa5d4df7b9" + }, + { + "dataPath": "params_shard_68.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.6.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c6452a6344c7c693610e3b6a72fc3926" + }, + { + "dataPath": "params_shard_69.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.6.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "bfaf6f3a3065bed92932fb6acdd13bf4" + }, + { + "dataPath": "params_shard_70.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.7.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "36c88170a84b6e1432dc3a0c1d63a10a" + }, + { + "dataPath": "params_shard_71.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.7.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "c0ececbde4381baa29d09fb79f6e6225" + }, + { + "dataPath": "params_shard_72.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.7.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "f55ee0af6ee9efb82cfbfc2031fc363f" + }, + { + "dataPath": "params_shard_73.bin", + "format": "raw-shard", + "nbytes": 25190400, + "records": [ + { + "name": "model.layers.4.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.5.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.5.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.5.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "model.layers.6.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16785408 + }, + { + "name": "model.layers.6.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16789504 + }, + { + "name": "model.layers.6.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16793600 + }, + { + "name": "model.layers.7.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25182208 + }, + { + "name": "model.layers.7.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25186304 + } + ], + "md5sum": "e597704e90744ab7a8275d0d4ece58a1" + }, + { + "dataPath": "params_shard_74.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.8.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "f49573dde258dc5e4760929a31e25b3a" + }, + { + "dataPath": "params_shard_75.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.8.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "7908070d6658100a6f8189cd3c394f4a" + }, + { + "dataPath": "params_shard_76.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.8.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "c4244772f938629230546a5ece038198" + }, + { + "dataPath": "params_shard_77.bin", + "format": "raw-shard", + "nbytes": 33554432, + "records": [ + { + "name": "model.layers.9.mlp.down_proj.weight", + "shape": [ + 2048, + 8192 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 33554432, + "byteOffset": 0 + } + ], + "md5sum": "b77b5916653faa1357b1d6202c476cc9" + }, + { + "dataPath": "params_shard_78.bin", + "format": "raw-shard", + "nbytes": 67108864, + "records": [ + { + "name": "model.layers.9.mlp.gate_up_proj.weight", + "shape": [ + 16384, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 67108864, + "byteOffset": 0 + } + ], + "md5sum": "ef75962dea1fe9ae4be0c06e00f59976" + }, + { + "dataPath": "params_shard_79.bin", + "format": "raw-shard", + "nbytes": 25165824, + "records": [ + { + "name": "model.layers.9.self_attn.qkv_proj.weight", + "shape": [ + 6144, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 25165824, + "byteOffset": 0 + } + ], + "md5sum": "24b21af2f10e3b6d167765d578ce54e4" + }, + { + "dataPath": "params_shard_80.bin", + "format": "raw-shard", + "nbytes": 25186304, + "records": [ + { + "name": "model.layers.7.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 0 + }, + { + "name": "model.layers.8.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8388608 + }, + { + "name": "model.layers.8.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 8392704 + }, + { + "name": "model.layers.8.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 8396800 + }, + { + "name": "model.layers.9.input_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16785408 + }, + { + "name": "model.layers.9.post_attention_layernorm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 16789504 + }, + { + "name": "model.layers.9.self_attn.o_proj.weight", + "shape": [ + 2048, + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 8388608, + "byteOffset": 16793600 + }, + { + "name": "model.norm.weight", + "shape": [ + 2048 + ], + "dtype": "float16", + "format": "f32-to-bf16", + "nbytes": 4096, + "byteOffset": 25182208 + } + ], + "md5sum": "c3efc4066aa0c0e56f27942514b9324e" + } + ] +} \ No newline at end of file diff --git a/params_shard_0.bin b/params_shard_0.bin new file mode 100644 index 0000000000000000000000000000000000000000..e53a2e63a9418f4c362ab6acdc1cde3e3d530964 --- /dev/null +++ b/params_shard_0.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4d4d761ff711e29e7d36cfaf9096ff83808c8efb8c8816af1c4b41a097996fa3 +size 201326592 diff --git a/params_shard_1.bin b/params_shard_1.bin new file mode 100644 index 0000000000000000000000000000000000000000..6b610dd10817f67c8e8f1d5dc35db1c1dd2d5fc5 --- /dev/null +++ b/params_shard_1.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d2476e844c2d45a05f3400220ea28ff69af7c86d0c127590c75f55ae22fed1af +size 33554432 diff --git a/params_shard_10.bin b/params_shard_10.bin new file mode 100644 index 0000000000000000000000000000000000000000..e7fbe6ebd9c4d16424e9afe1830946a3e5cd78f3 --- /dev/null +++ b/params_shard_10.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7f23a8e826ccce00d0fe5359a92084c0e9b61aeb55f39ab59f175e96bb13b107 +size 33554432 diff --git a/params_shard_11.bin b/params_shard_11.bin new file mode 100644 index 0000000000000000000000000000000000000000..a033b5feb4e01f89e6d5d3b13a7e2afc15b1e85f --- /dev/null +++ b/params_shard_11.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:bdf51a9f17af1ceb3b915c5ffb8af3d52dc8a7b074239d20858bfa144b895169 +size 67108864 diff --git a/params_shard_12.bin b/params_shard_12.bin new file mode 100644 index 0000000000000000000000000000000000000000..56256883bd9947d8115399cbc13e6d1bd602c183 --- /dev/null +++ b/params_shard_12.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b785af6fea93b5912229c1f30a2a8107125d19b06206fe05c45ef0b106865d1f +size 25165824 diff --git a/params_shard_13.bin b/params_shard_13.bin new file mode 100644 index 0000000000000000000000000000000000000000..597eb7858dd0fab505ee12e4d4588f9be906c0cd --- /dev/null +++ b/params_shard_13.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3522ac76d8d85fbc4a1bfd40ffcb9d0f3774d72e7815a3af8947933aad7cb88d +size 25190400 diff --git a/params_shard_14.bin b/params_shard_14.bin new file mode 100644 index 0000000000000000000000000000000000000000..4a9e123cd940f8a590562d39086df0c830a85147 --- /dev/null +++ b/params_shard_14.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:634e16a9b692f369f56432fd556a54496aa0c97de0110339c64c6dc811575392 +size 33554432 diff --git a/params_shard_15.bin b/params_shard_15.bin new file mode 100644 index 0000000000000000000000000000000000000000..bd50a1146d62e3da995e1c43bf76a85fdf3bbd2f --- /dev/null +++ b/params_shard_15.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4fbfd235a3bee1be6ef96867ed388ff5f2d9eb6723baf43018b6a81afada9bec +size 67108864 diff --git a/params_shard_16.bin b/params_shard_16.bin new file mode 100644 index 0000000000000000000000000000000000000000..67dfe5fb1265f0c73c1f167e989a93933bf4514a --- /dev/null +++ b/params_shard_16.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d1a16f49c678f69260f1d1074f6bed3e9ad9f18799fcfc9401113f61073b7ff2 +size 25165824 diff --git a/params_shard_17.bin b/params_shard_17.bin new file mode 100644 index 0000000000000000000000000000000000000000..342bf09e8d9f592b8a4ec1dc1b107c49c03ffc80 --- /dev/null +++ b/params_shard_17.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:204f8059337078deec73020cf675bdccbfdf24578a8b7242312744ffc76bc8a8 +size 33554432 diff --git a/params_shard_18.bin b/params_shard_18.bin new file mode 100644 index 0000000000000000000000000000000000000000..a899d7799b8c9d9aa7e05e711b1e393df1ba7964 --- /dev/null +++ b/params_shard_18.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:890c500368d65c14709233527d57fc78bf54f409b5ef1515280e4d675619d4b8 +size 67108864 diff --git a/params_shard_19.bin b/params_shard_19.bin new file mode 100644 index 0000000000000000000000000000000000000000..8e3ec0177cb8d9d527378bdf0d658f30a8692575 --- /dev/null +++ b/params_shard_19.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9d415db41fb22dac28c69820296dc148b62385898ae3ad5fc46da1ca0961acc9 +size 25165824 diff --git a/params_shard_2.bin b/params_shard_2.bin new file mode 100644 index 0000000000000000000000000000000000000000..d414d72e194c951cfc2d1cdbf36cfac7b991e3b0 --- /dev/null +++ b/params_shard_2.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2ea74afe886a0f234b1c765c055f52b248265b26e7c9362e65ba49159ee891f4 +size 67108864 diff --git a/params_shard_20.bin b/params_shard_20.bin new file mode 100644 index 0000000000000000000000000000000000000000..53fb08f56a0109494074c74396f56d547a33f81b --- /dev/null +++ b/params_shard_20.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b7b1579aba11afbc4dcae32f1798f75b9d21a7a1fdc5bd82bb970b4e9e67f8f2 +size 33554432 diff --git a/params_shard_21.bin b/params_shard_21.bin new file mode 100644 index 0000000000000000000000000000000000000000..9e8f901a5458625c0ad3d5c1cad58c3611eef056 --- /dev/null +++ b/params_shard_21.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ac6a81acbc034f9579c619d10f3dfe5c7fe8732bcbeb00b0a0822db692923d +size 67108864 diff --git a/params_shard_22.bin b/params_shard_22.bin new file mode 100644 index 0000000000000000000000000000000000000000..94cf80153fb54baa047bc2403cec203e941ae83f --- /dev/null +++ b/params_shard_22.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f06f20326d24aa7be17f330c2c1b2993c75644051d9bd093228294c0cd2c47fc +size 25165824 diff --git a/params_shard_23.bin b/params_shard_23.bin new file mode 100644 index 0000000000000000000000000000000000000000..0d67e81768007ab14dca5c63e931b65079448793 --- /dev/null +++ b/params_shard_23.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5a05ebf87a548c4d8ce553c978dbe105f237c8c4201e0eec8e3ff6e229524f82 +size 25190400 diff --git a/params_shard_24.bin b/params_shard_24.bin new file mode 100644 index 0000000000000000000000000000000000000000..b30bfabdd6231bf35fac36454ee273888ed6d485 --- /dev/null +++ b/params_shard_24.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:98a97d60299c4c04d4398fc94dddf2cb71b12d98b01bb27e56f769a8473ec8df +size 33554432 diff --git a/params_shard_25.bin b/params_shard_25.bin new file mode 100644 index 0000000000000000000000000000000000000000..8301e206017cd4b7f35fbb106bbc0521dd6f71d8 --- /dev/null +++ b/params_shard_25.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:10a885c6f46b3a608a41f011a1822bac89977155edfbc89eca7eb59790c2dced +size 67108864 diff --git a/params_shard_26.bin b/params_shard_26.bin new file mode 100644 index 0000000000000000000000000000000000000000..0227f86f2096416bf052a6871a62d58b83c8258e --- /dev/null +++ b/params_shard_26.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:312ae5e1b1c13e6e7375bb5dfa66f57bfe69a7b9a727b94d886714fb1865ca2f +size 25165824 diff --git a/params_shard_27.bin b/params_shard_27.bin new file mode 100644 index 0000000000000000000000000000000000000000..3962efbb802e0cc653ba15a4ea232d27d7884e7f --- /dev/null +++ b/params_shard_27.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e751ad2fdcded1623b94fc5bd1ccd004afb3a051da246137f3ff707a780eeacb +size 33554432 diff --git a/params_shard_28.bin b/params_shard_28.bin new file mode 100644 index 0000000000000000000000000000000000000000..992c9efd643fd52e53b29e0df1de8236d0b54d2c --- /dev/null +++ b/params_shard_28.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:96723727c95302c51ef9380f5ba9e58da882bdc330a2c9c22dbc161435a697cd +size 67108864 diff --git a/params_shard_29.bin b/params_shard_29.bin new file mode 100644 index 0000000000000000000000000000000000000000..562492250fc749b43da52b4ead1c852d64d78892 --- /dev/null +++ b/params_shard_29.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:22b1bcb3e4767413aeac20feb6fa130ed40132ca8e4c124bca06dfbfaa4112c4 +size 25165824 diff --git a/params_shard_3.bin b/params_shard_3.bin new file mode 100644 index 0000000000000000000000000000000000000000..1e8948c20dc9d91e1761ef5c02057e98ea73fdb8 --- /dev/null +++ b/params_shard_3.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c9bd09534037494ba1a5a989bc773df1c7fbadd1588982f82c7c57c0c4921175 +size 25174016 diff --git a/params_shard_30.bin b/params_shard_30.bin new file mode 100644 index 0000000000000000000000000000000000000000..d577f5b95e9f986f1ab044d4d9c6b76ef538a531 --- /dev/null +++ b/params_shard_30.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f5950ad074a47190becd7fb0d5c0d5d07e3261767f1236d2fe5df77ebb41153c +size 33554432 diff --git a/params_shard_31.bin b/params_shard_31.bin new file mode 100644 index 0000000000000000000000000000000000000000..ad10c0540d96078156756a656f012e3b3c168f8c --- /dev/null +++ b/params_shard_31.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:54c8f69d949bd0b3eeb84c77acc37f43e618b69d9abacaae95e8210fa63af014 +size 67108864 diff --git a/params_shard_32.bin b/params_shard_32.bin new file mode 100644 index 0000000000000000000000000000000000000000..733de2982583e67b1ea67d9f3dfa1fc6d556bc12 --- /dev/null +++ b/params_shard_32.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:337e42661e5d765d714a42d7bbbcf66ee70a9553233b00e2cb91fcbe7abbb1bf +size 25165824 diff --git a/params_shard_33.bin b/params_shard_33.bin new file mode 100644 index 0000000000000000000000000000000000000000..e9d2485924a74ee866a916025082060f3218fb10 --- /dev/null +++ b/params_shard_33.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a108a187bd243294800c31e24a553662830cd482820f60123027cec081870cae +size 25190400 diff --git a/params_shard_34.bin b/params_shard_34.bin new file mode 100644 index 0000000000000000000000000000000000000000..7bba59bdb0597712f512de93b66d14208dd84ba2 --- /dev/null +++ b/params_shard_34.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:ffaf10fbe9011bb800ed8dd2c11c2f1990bbf1a9dfbaf5db7de3fcbc0ff4c7e2 +size 33554432 diff --git a/params_shard_35.bin b/params_shard_35.bin new file mode 100644 index 0000000000000000000000000000000000000000..fbfadae4b18f3ed5e824f7f0078a358cc4aa3258 --- /dev/null +++ b/params_shard_35.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1d4131287d72d512035978140580ecc7b1af9ab0b9927d467f403c41d843d002 +size 67108864 diff --git a/params_shard_36.bin b/params_shard_36.bin new file mode 100644 index 0000000000000000000000000000000000000000..7f6d1b62be7f3d0f6421799a7176d44e0e5eb258 --- /dev/null +++ b/params_shard_36.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:a63eb39c230723dec9adbc2484ba0fa6c795c20db6b8e393f0d8fee37fff681d +size 25165824 diff --git a/params_shard_37.bin b/params_shard_37.bin new file mode 100644 index 0000000000000000000000000000000000000000..0841a9b4272f3639cf28ca7bf7ddf258ee0e11a1 --- /dev/null +++ b/params_shard_37.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6d870c9812d3191a8a327e0290d0f0dd45eca0443d51036a8ce99a0f9778f616 +size 33554432 diff --git a/params_shard_38.bin b/params_shard_38.bin new file mode 100644 index 0000000000000000000000000000000000000000..33762dfe9a74a3a40e849b4717f01d4b41a69a82 --- /dev/null +++ b/params_shard_38.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f54a7d5bb59f3558a823c3f9cd5b69836eacdd52079dafc0e8bdfce3df1914e5 +size 67108864 diff --git a/params_shard_39.bin b/params_shard_39.bin new file mode 100644 index 0000000000000000000000000000000000000000..cdda235eeb3661b99efb0e23162bf1f2f3b58200 --- /dev/null +++ b/params_shard_39.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:88f9ca4723125c25246cd0655a3d81f0304ef26122557da2314902dda1bf523d +size 25165824 diff --git a/params_shard_4.bin b/params_shard_4.bin new file mode 100644 index 0000000000000000000000000000000000000000..7afd152e2dc8d6c910293121c57f3da175de913e --- /dev/null +++ b/params_shard_4.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:064a6fcd10e23584a1f2ca7894c888a17814cd7b48b181f872d1822a3e203d20 +size 33554432 diff --git a/params_shard_40.bin b/params_shard_40.bin new file mode 100644 index 0000000000000000000000000000000000000000..f27e51c571ed5c7bc4e975581ba51408d16fefd8 --- /dev/null +++ b/params_shard_40.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:626fa4d32e40b10b0229e41907c179eff368e55ddfa6b3fa3b10657854ed0549 +size 33554432 diff --git a/params_shard_41.bin b/params_shard_41.bin new file mode 100644 index 0000000000000000000000000000000000000000..47797b9f18da80cceef70206a4d1356f6b0e38f0 --- /dev/null +++ b/params_shard_41.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9440acf00de713546e3e1f757f8378019ab04e5f7fc7e60fa2c30c6fd699c305 +size 67108864 diff --git a/params_shard_42.bin b/params_shard_42.bin new file mode 100644 index 0000000000000000000000000000000000000000..bde49ddaa03c2e9aa02efff82d970c0beb545c7b --- /dev/null +++ b/params_shard_42.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9ec645b6f22273af11c8adcb1343d167b9c50c875c5d9cca79e3117ab1bacf4c +size 25165824 diff --git a/params_shard_43.bin b/params_shard_43.bin new file mode 100644 index 0000000000000000000000000000000000000000..22ed7001fad0535cf8048abdd98f4927f60ef190 --- /dev/null +++ b/params_shard_43.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7c6df99c742348475600cf76e113de2e36f54cbaf39fdf783256671ef1cf8f9e +size 25190400 diff --git a/params_shard_44.bin b/params_shard_44.bin new file mode 100644 index 0000000000000000000000000000000000000000..f07204b970c04d296a37be54cc3fe8538c7cce8b --- /dev/null +++ b/params_shard_44.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:b4253f7122a314a4e838f852aff7e646b66ee25f038ff384a8a0234f6c7516db +size 33554432 diff --git a/params_shard_45.bin b/params_shard_45.bin new file mode 100644 index 0000000000000000000000000000000000000000..6137b5e20e7cab8ab863d2ab12eb00d012152d66 --- /dev/null +++ b/params_shard_45.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:178772f55c575286086db6d4bc36ae50f8c60bacbc483535738caaaa539bece8 +size 67108864 diff --git a/params_shard_46.bin b/params_shard_46.bin new file mode 100644 index 0000000000000000000000000000000000000000..9677df859794170babb044540bf1831d226ee43f --- /dev/null +++ b/params_shard_46.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d087dc5ad11b66c794ba48c144f9d97a9a2e54a5f710722b5d5dfb598343ea27 +size 25165824 diff --git a/params_shard_47.bin b/params_shard_47.bin new file mode 100644 index 0000000000000000000000000000000000000000..2b82532144f6375a3a56b8e8673cd205a28bc803 --- /dev/null +++ b/params_shard_47.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:aad64641523bdd600913cf393a002f3d0c87e908ffb428f8cf64ee2775e2031e +size 33554432 diff --git a/params_shard_48.bin b/params_shard_48.bin new file mode 100644 index 0000000000000000000000000000000000000000..f683a030f45ec6f23d4d5baf5d126a688792b66d --- /dev/null +++ b/params_shard_48.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fab27feb0d503eb0b246366d48441969e91a6a191d5a9702b581c518d37f6333 +size 67108864 diff --git a/params_shard_49.bin b/params_shard_49.bin new file mode 100644 index 0000000000000000000000000000000000000000..e2acfb2893e309c975d91c1080ffc49b626a87f4 --- /dev/null +++ b/params_shard_49.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d780b74b9dca4ffa7af102f0300e65350387e3cd94af94e208c2c40caf2faf3c +size 25165824 diff --git a/params_shard_5.bin b/params_shard_5.bin new file mode 100644 index 0000000000000000000000000000000000000000..b047c54a705139bc6feb3652a0380f79283db1d1 --- /dev/null +++ b/params_shard_5.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:3b4ecd73649495dbdadcfadb9a18d0948dfe77ce43dd91398f545fb3b31201aa +size 67108864 diff --git a/params_shard_50.bin b/params_shard_50.bin new file mode 100644 index 0000000000000000000000000000000000000000..6486e24f9d5e0cef61ed34a0f5b3c3f4d938b2eb --- /dev/null +++ b/params_shard_50.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:faba238e8bcc3fc73dee72114147001eded8e714f16194e1f4d11e147d1876e4 +size 33554432 diff --git a/params_shard_51.bin b/params_shard_51.bin new file mode 100644 index 0000000000000000000000000000000000000000..d546adcf4e72c9130a38605f2147ac2e7a95d21e --- /dev/null +++ b/params_shard_51.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:2d7b20bd4972da0fc5386c0e3df8504cbe1c29e534caeb3447aeb73fd3d8ceb0 +size 67108864 diff --git a/params_shard_52.bin b/params_shard_52.bin new file mode 100644 index 0000000000000000000000000000000000000000..249e3a032be964b7a5879058c8c6fe9b0d781913 --- /dev/null +++ b/params_shard_52.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:0d734b4cb91744854e806294fac1d6c06cd3fdeff4227a12351d69af22f47788 +size 25165824 diff --git a/params_shard_53.bin b/params_shard_53.bin new file mode 100644 index 0000000000000000000000000000000000000000..a8da226921b0692db3254a5e25f17c63395bc255 --- /dev/null +++ b/params_shard_53.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:558bfc9b75875abc1ac209d1f2c3e2fbb1c137a11c731e2507dca64b7e151fdb +size 25190400 diff --git a/params_shard_54.bin b/params_shard_54.bin new file mode 100644 index 0000000000000000000000000000000000000000..bcd24245ed0adfa8b0d1348165d9ed9b371762b9 --- /dev/null +++ b/params_shard_54.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:82dc34e025d0925d8b7ea3ce82cdd25cd867df8aac694f997df768d97a7f461c +size 33554432 diff --git a/params_shard_55.bin b/params_shard_55.bin new file mode 100644 index 0000000000000000000000000000000000000000..1eaf8a3e56214ac778346246a95d0bdaba74804b --- /dev/null +++ b/params_shard_55.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:c3c493faf60dcd150c239a323ca308a4bece4aa02613542f26e1d60fae0416a3 +size 67108864 diff --git a/params_shard_56.bin b/params_shard_56.bin new file mode 100644 index 0000000000000000000000000000000000000000..ff84f5a223dbba6021d68cbeb8a1b88fc4c062cb --- /dev/null +++ b/params_shard_56.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:877d9e2d476237e2781b91610c1edd89e5599ef4561d44a40b8a6b7070ec99d8 +size 25165824 diff --git a/params_shard_57.bin b/params_shard_57.bin new file mode 100644 index 0000000000000000000000000000000000000000..ce0d6b37b75484e0bee54ce6331fac41907b6045 --- /dev/null +++ b/params_shard_57.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f35f24f907799541e1ed3fbb4b27082410e7ae641bf39f52217a82eec37514e3 +size 33554432 diff --git a/params_shard_58.bin b/params_shard_58.bin new file mode 100644 index 0000000000000000000000000000000000000000..b75343a2816e770e250bef2d4ced057a450c9911 --- /dev/null +++ b/params_shard_58.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:9cd1a044b03f1f5e0dbd25b0c419a3c1f976aa2b655ba3dcf62494810b5ae2f8 +size 67108864 diff --git a/params_shard_59.bin b/params_shard_59.bin new file mode 100644 index 0000000000000000000000000000000000000000..a33a372b12bc7da9f6707eb6a2171669dc1ff0c1 --- /dev/null +++ b/params_shard_59.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f50acf6a684115e19d1227c1cf67d8ce77ddccea81072382b08851e35a8ef4f9 +size 25165824 diff --git a/params_shard_6.bin b/params_shard_6.bin new file mode 100644 index 0000000000000000000000000000000000000000..008406aa14248e755eba77391e3c37f4383bc5e0 --- /dev/null +++ b/params_shard_6.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:056afea724dc5f4d18a6d05ff7ed37ec2e7b9a07e63e989fe8bef8f2b2e08946 +size 25165824 diff --git a/params_shard_60.bin b/params_shard_60.bin new file mode 100644 index 0000000000000000000000000000000000000000..edcccea9c7563f161a9150c5c923c1705242e4ab --- /dev/null +++ b/params_shard_60.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:83956c0d19fe0309b96fc433714bcf4caf7d2017eb7e2968a209681882140340 +size 33554432 diff --git a/params_shard_61.bin b/params_shard_61.bin new file mode 100644 index 0000000000000000000000000000000000000000..e4e5842f72d656a6a2f8371d8d5d12bca98a23d3 --- /dev/null +++ b/params_shard_61.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:832f9e936da0318f3960207a21e272e10c717c1f5d827399ad29a3339e477583 +size 67108864 diff --git a/params_shard_62.bin b/params_shard_62.bin new file mode 100644 index 0000000000000000000000000000000000000000..53bd5d176c101df9b2525a0c0dbdf26d73d8855b --- /dev/null +++ b/params_shard_62.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:13b677992cbaa5e51288bdff6b95589ccb14144d111de71a3a9f07c595bf984f +size 25165824 diff --git a/params_shard_63.bin b/params_shard_63.bin new file mode 100644 index 0000000000000000000000000000000000000000..62f6a56e62fae18a70825012c7cc3330aa045f7d --- /dev/null +++ b/params_shard_63.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:8a958022919a8330c64c10e90d1a0c336ff3a6a6f4b297f8159cc574ea03bb80 +size 25190400 diff --git a/params_shard_64.bin b/params_shard_64.bin new file mode 100644 index 0000000000000000000000000000000000000000..72fdab488b70a7bbc4e617c01cd1bcb56bc993a8 --- /dev/null +++ b/params_shard_64.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:e46cf5d0fe9043bc8e7599c0aca79e71c962a13ba7aa7fbdd55cea9de176547f +size 33554432 diff --git a/params_shard_65.bin b/params_shard_65.bin new file mode 100644 index 0000000000000000000000000000000000000000..90e71140c5ceb300a1faf60570d6861e1c496b54 --- /dev/null +++ b/params_shard_65.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:fd494960aeb98ea4788477f7be92edb89b73992e863eea6c9fdd445cb8ace4d3 +size 67108864 diff --git a/params_shard_66.bin b/params_shard_66.bin new file mode 100644 index 0000000000000000000000000000000000000000..6fd8ad02b1b666cc931f1a7a004be1bc755e031f --- /dev/null +++ b/params_shard_66.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:018d7d7bd9f77cb1c1234c17009e2ba879865e7fe714467c6e4d36fa93bf07ce +size 25165824 diff --git a/params_shard_67.bin b/params_shard_67.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d6547151ca3578870593181338efe5a0c727307 --- /dev/null +++ b/params_shard_67.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:7ad2f21a345f18cdccb589bd1ad82ad775e8beffc416e98532ee4c6d7c385ee3 +size 33554432 diff --git a/params_shard_68.bin b/params_shard_68.bin new file mode 100644 index 0000000000000000000000000000000000000000..7872ec2acd4cb3fa56348ab0ff54b4c49309969a --- /dev/null +++ b/params_shard_68.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:532dd02fca48bebd20c8998028dec9eb4f7e2b2b83d6d20a7c1191fbd45c822d +size 67108864 diff --git a/params_shard_69.bin b/params_shard_69.bin new file mode 100644 index 0000000000000000000000000000000000000000..3e553b7587d14b1ff6348feef63d1c4eb6c00731 --- /dev/null +++ b/params_shard_69.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:6cc8235906758efa9f9fae4a92c2c6a2fba5b95c2a5012ae5b02795f5f57541f +size 25165824 diff --git a/params_shard_7.bin b/params_shard_7.bin new file mode 100644 index 0000000000000000000000000000000000000000..a7b543253feea84ecb07507998837a9e878ebaec --- /dev/null +++ b/params_shard_7.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f188e4aad386e36dfdb88531ae44d00f7f5ce30ec42523baba314d164d2533a5 +size 33554432 diff --git a/params_shard_70.bin b/params_shard_70.bin new file mode 100644 index 0000000000000000000000000000000000000000..046f260232756a9bc0060d586e4fa7334ab8614b --- /dev/null +++ b/params_shard_70.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:471812dfb90625b18867d5c39fcf92147a26af1a9e67c3373cba4ace592634c9 +size 33554432 diff --git a/params_shard_71.bin b/params_shard_71.bin new file mode 100644 index 0000000000000000000000000000000000000000..a1b62906c61fcea3c01e987f63bf4340050fe1e7 --- /dev/null +++ b/params_shard_71.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:f62f45115d1a19ee17b0b39d20613f3c13b4f55e60e99b0f7becdfca5af008c9 +size 67108864 diff --git a/params_shard_72.bin b/params_shard_72.bin new file mode 100644 index 0000000000000000000000000000000000000000..2e1ff044ada4615345b904318faa5c7fece9af59 --- /dev/null +++ b/params_shard_72.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:5c855ff8f9e8519eea881e3690d4027da77d30340307d6546efc6a3ef9433d74 +size 25165824 diff --git a/params_shard_73.bin b/params_shard_73.bin new file mode 100644 index 0000000000000000000000000000000000000000..8626fa5320de27786edaadbb8a627a634135427f --- /dev/null +++ b/params_shard_73.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cdafa5a3632b3d98c10d034eba725ed751e41ce1fac76f95bf889b55a39f5ce2 +size 25190400 diff --git a/params_shard_74.bin b/params_shard_74.bin new file mode 100644 index 0000000000000000000000000000000000000000..549672e120ee4055f49608674e341499c8f41469 --- /dev/null +++ b/params_shard_74.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:924b4845739b587e1f42c590b1dbc7602c7546109309a65b904fe620f11c28be +size 33554432 diff --git a/params_shard_75.bin b/params_shard_75.bin new file mode 100644 index 0000000000000000000000000000000000000000..a402589bff0fdb3abf844c9cc58e7b53fc832121 --- /dev/null +++ b/params_shard_75.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:cd61444cb8deb810cedb6ff051a3ce2bc7f8da0f8f464511c161123c14ac16db +size 67108864 diff --git a/params_shard_76.bin b/params_shard_76.bin new file mode 100644 index 0000000000000000000000000000000000000000..6ea7201575a444a12367f347801994908fd0b974 --- /dev/null +++ b/params_shard_76.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d7b294d65bf7082bcd69520724e47926778e3e4242f69253c7161ee0297bfc71 +size 25165824 diff --git a/params_shard_77.bin b/params_shard_77.bin new file mode 100644 index 0000000000000000000000000000000000000000..b401c8d3742b44508b3e1ca0f1c1df485ab31d8f --- /dev/null +++ b/params_shard_77.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:48ac39db0ae4a2e34f7baae3083b70db107f4e9bdd4ff06322c4afc9030c9530 +size 33554432 diff --git a/params_shard_78.bin b/params_shard_78.bin new file mode 100644 index 0000000000000000000000000000000000000000..be313f94916514c6d86cfceb9ab5dc7acf9ed7ef --- /dev/null +++ b/params_shard_78.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:1dd62483ff99c5d3736b613b6454f21d95410ad3bd27cec830a9fcacde85355e +size 67108864 diff --git a/params_shard_79.bin b/params_shard_79.bin new file mode 100644 index 0000000000000000000000000000000000000000..1171b0f2d12b2e37c5dafba82a014a4dccb643e9 --- /dev/null +++ b/params_shard_79.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:d382be5aee9dc09a7da76a0807636230b277bb807eb6e7c6cff42cd962193607 +size 25165824 diff --git a/params_shard_8.bin b/params_shard_8.bin new file mode 100644 index 0000000000000000000000000000000000000000..dcb71eed2b090d665af4464ca711fb4cd4987fc2 --- /dev/null +++ b/params_shard_8.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:4b7e063fe0d7161d0d7f396f03a4f6cca10778e75bab2bca271275a691a7b638 +size 67108864 diff --git a/params_shard_80.bin b/params_shard_80.bin new file mode 100644 index 0000000000000000000000000000000000000000..2d01c2fe88b4a31e25318bf4c93ae7c9cf2b6d55 --- /dev/null +++ b/params_shard_80.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:824fb2ba8f2bae63fb39e14e23f7d64ab13296890fd4c5eb5db5964088bc33e8 +size 25186304 diff --git a/params_shard_9.bin b/params_shard_9.bin new file mode 100644 index 0000000000000000000000000000000000000000..6bfde2e94e5d5db9140d1329a306b1c2152259c5 --- /dev/null +++ b/params_shard_9.bin @@ -0,0 +1,3 @@ +version https://git-lfs.github.com/spec/v1 +oid sha256:844724cb1f453b2a4a1bf2f5233f4c5fca02e06a80de41818619975f085c2ac4 +size 25165824