kewang2 haoyang-amd commited on
Commit
491287d
·
verified ·
1 Parent(s): c0afb44

update files (#6)

Browse files

- update model (fbd18769b5a306ebf5912f56c8ca7c4a815ab2cf)


Co-authored-by: haoyanli <[email protected]>

config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/model_path/Llama-3.2-11B-Vision-Instruct",
3
  "architectures": [
4
  "MllamaForConditionalGeneration"
5
  ],
@@ -9,7 +9,6 @@
9
  "activation_scheme": "static",
10
  "ignored_layers": [
11
  "*lm_head",
12
- "*cross_attn*",
13
  "vision_model*",
14
  "multi_modal_projector"
15
  ],
 
1
  {
2
+ "_name_or_path": "/model_path/meta-llama/Llama-3.2-11B-Vision-Instruct",
3
  "architectures": [
4
  "MllamaForConditionalGeneration"
5
  ],
 
9
  "activation_scheme": "static",
10
  "ignored_layers": [
11
  "*lm_head",
 
12
  "vision_model*",
13
  "multi_modal_projector"
14
  ],
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3a3123a5a86e315ff58920026ef7a912fb13788747d38acbbbec47012e89052b
3
- size 4984320774
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f19ac12e7ce70ecaf2a71ddfc1d19e1f66afd657dea59d8dc8d2ae06731ca304
3
+ size 4984340938
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da57cc3b84a544a430a742177e2ba98d5870cba85bf0cb7f0c3974f31e73d715
3
- size 4966480928
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62dfc57434ed8285cba3038890d783bbe662dea68b7c4f9e85e36fd68b0b91bd
3
+ size 4991668796
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8a6c5ade219cab320a5fef98063d814d9c5005fe4c015f9d75d0c9a6d91bd6b5
3
- size 3001212912
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:268aadb2d226b852c4419ba35266a45129e229c1f6c9deb9214d468acc62f9c1
3
+ size 2640468128
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 12951835782
4
  },
5
  "weight_map": {
6
  "language_model.lm_head.weight": "model-00003-of-00003.safetensors",
@@ -64,19 +64,19 @@
64
  "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
65
  "language_model.model.layers.10.mlp.up_proj.weight_scale": "model-00002-of-00003.safetensors",
66
  "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
- "language_model.model.layers.10.self_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
68
- "language_model.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
69
- "language_model.model.layers.10.self_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
70
- "language_model.model.layers.10.self_attn.kv_scale": "model-00002-of-00003.safetensors",
71
  "language_model.model.layers.10.self_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
72
  "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
73
  "language_model.model.layers.10.self_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
74
- "language_model.model.layers.10.self_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
75
- "language_model.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
76
- "language_model.model.layers.10.self_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
77
- "language_model.model.layers.10.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
78
- "language_model.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
79
- "language_model.model.layers.10.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
80
  "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
  "language_model.model.layers.11.mlp.down_proj.input_scale": "model-00002-of-00003.safetensors",
82
  "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
@@ -126,11 +126,19 @@
126
  "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
127
  "language_model.model.layers.12.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
128
  "language_model.model.layers.13.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
 
129
  "language_model.model.layers.13.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
 
 
130
  "language_model.model.layers.13.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
 
131
  "language_model.model.layers.13.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
 
132
  "language_model.model.layers.13.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
 
 
133
  "language_model.model.layers.13.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
 
134
  "language_model.model.layers.13.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
135
  "language_model.model.layers.13.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
136
  "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
@@ -241,11 +249,19 @@
241
  "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
242
  "language_model.model.layers.17.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
243
  "language_model.model.layers.18.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
 
244
  "language_model.model.layers.18.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
 
 
245
  "language_model.model.layers.18.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
 
246
  "language_model.model.layers.18.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
 
247
  "language_model.model.layers.18.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
 
 
248
  "language_model.model.layers.18.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
 
249
  "language_model.model.layers.18.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
250
  "language_model.model.layers.18.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
251
  "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
@@ -380,11 +396,19 @@
380
  "language_model.model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
381
  "language_model.model.layers.22.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
382
  "language_model.model.layers.23.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
 
383
  "language_model.model.layers.23.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
 
 
384
  "language_model.model.layers.23.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
 
385
  "language_model.model.layers.23.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
 
386
  "language_model.model.layers.23.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
 
 
387
  "language_model.model.layers.23.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
 
388
  "language_model.model.layers.23.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
389
  "language_model.model.layers.23.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
390
  "language_model.model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors",
@@ -495,11 +519,19 @@
495
  "language_model.model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
496
  "language_model.model.layers.27.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
497
  "language_model.model.layers.28.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
 
498
  "language_model.model.layers.28.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
 
 
499
  "language_model.model.layers.28.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
 
500
  "language_model.model.layers.28.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
 
501
  "language_model.model.layers.28.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
 
 
502
  "language_model.model.layers.28.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
 
503
  "language_model.model.layers.28.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
504
  "language_model.model.layers.28.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
505
  "language_model.model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors",
@@ -538,11 +570,19 @@
538
  "language_model.model.layers.29.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
539
  "language_model.model.layers.29.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
540
  "language_model.model.layers.3.cross_attn.k_norm.weight": "model-00001-of-00003.safetensors",
 
541
  "language_model.model.layers.3.cross_attn.k_proj.weight": "model-00001-of-00003.safetensors",
 
 
542
  "language_model.model.layers.3.cross_attn.o_proj.weight": "model-00001-of-00003.safetensors",
 
543
  "language_model.model.layers.3.cross_attn.q_norm.weight": "model-00001-of-00003.safetensors",
 
544
  "language_model.model.layers.3.cross_attn.q_proj.weight": "model-00001-of-00003.safetensors",
 
 
545
  "language_model.model.layers.3.cross_attn.v_proj.weight": "model-00001-of-00003.safetensors",
 
546
  "language_model.model.layers.3.cross_attn_attn_gate": "model-00001-of-00003.safetensors",
547
  "language_model.model.layers.3.cross_attn_mlp_gate": "model-00001-of-00003.safetensors",
548
  "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
@@ -580,17 +620,17 @@
580
  "language_model.model.layers.30.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
581
  "language_model.model.layers.30.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
582
  "language_model.model.layers.30.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
583
- "language_model.model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
584
- "language_model.model.layers.31.mlp.down_proj.input_scale": "model-00003-of-00003.safetensors",
585
- "language_model.model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
586
- "language_model.model.layers.31.mlp.down_proj.weight_scale": "model-00003-of-00003.safetensors",
587
  "language_model.model.layers.31.mlp.gate_proj.input_scale": "model-00002-of-00003.safetensors",
588
  "language_model.model.layers.31.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
589
  "language_model.model.layers.31.mlp.gate_proj.weight_scale": "model-00002-of-00003.safetensors",
590
  "language_model.model.layers.31.mlp.up_proj.input_scale": "model-00002-of-00003.safetensors",
591
  "language_model.model.layers.31.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
592
  "language_model.model.layers.31.mlp.up_proj.weight_scale": "model-00002-of-00003.safetensors",
593
- "language_model.model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
594
  "language_model.model.layers.31.self_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
595
  "language_model.model.layers.31.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
596
  "language_model.model.layers.31.self_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
@@ -604,38 +644,46 @@
604
  "language_model.model.layers.31.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
605
  "language_model.model.layers.31.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
606
  "language_model.model.layers.31.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
607
- "language_model.model.layers.32.input_layernorm.weight": "model-00003-of-00003.safetensors",
608
- "language_model.model.layers.32.mlp.down_proj.input_scale": "model-00003-of-00003.safetensors",
609
- "language_model.model.layers.32.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
610
- "language_model.model.layers.32.mlp.down_proj.weight_scale": "model-00003-of-00003.safetensors",
611
- "language_model.model.layers.32.mlp.gate_proj.input_scale": "model-00003-of-00003.safetensors",
612
- "language_model.model.layers.32.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
613
- "language_model.model.layers.32.mlp.gate_proj.weight_scale": "model-00003-of-00003.safetensors",
614
- "language_model.model.layers.32.mlp.up_proj.input_scale": "model-00003-of-00003.safetensors",
615
- "language_model.model.layers.32.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
616
- "language_model.model.layers.32.mlp.up_proj.weight_scale": "model-00003-of-00003.safetensors",
617
- "language_model.model.layers.32.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
618
- "language_model.model.layers.32.self_attn.k_proj.input_scale": "model-00003-of-00003.safetensors",
619
- "language_model.model.layers.32.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
620
- "language_model.model.layers.32.self_attn.k_proj.weight_scale": "model-00003-of-00003.safetensors",
621
- "language_model.model.layers.32.self_attn.kv_scale": "model-00003-of-00003.safetensors",
622
- "language_model.model.layers.32.self_attn.o_proj.input_scale": "model-00003-of-00003.safetensors",
623
- "language_model.model.layers.32.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
624
- "language_model.model.layers.32.self_attn.o_proj.weight_scale": "model-00003-of-00003.safetensors",
625
- "language_model.model.layers.32.self_attn.q_proj.input_scale": "model-00003-of-00003.safetensors",
626
- "language_model.model.layers.32.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
627
- "language_model.model.layers.32.self_attn.q_proj.weight_scale": "model-00003-of-00003.safetensors",
628
- "language_model.model.layers.32.self_attn.v_proj.input_scale": "model-00003-of-00003.safetensors",
629
- "language_model.model.layers.32.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
630
- "language_model.model.layers.32.self_attn.v_proj.weight_scale": "model-00003-of-00003.safetensors",
631
  "language_model.model.layers.33.cross_attn.k_norm.weight": "model-00003-of-00003.safetensors",
 
632
  "language_model.model.layers.33.cross_attn.k_proj.weight": "model-00003-of-00003.safetensors",
 
 
633
  "language_model.model.layers.33.cross_attn.o_proj.weight": "model-00003-of-00003.safetensors",
 
634
  "language_model.model.layers.33.cross_attn.q_norm.weight": "model-00003-of-00003.safetensors",
 
635
  "language_model.model.layers.33.cross_attn.q_proj.weight": "model-00003-of-00003.safetensors",
 
 
636
  "language_model.model.layers.33.cross_attn.v_proj.weight": "model-00003-of-00003.safetensors",
637
- "language_model.model.layers.33.cross_attn_attn_gate": "model-00003-of-00003.safetensors",
638
- "language_model.model.layers.33.cross_attn_mlp_gate": "model-00003-of-00003.safetensors",
 
639
  "language_model.model.layers.33.input_layernorm.weight": "model-00003-of-00003.safetensors",
640
  "language_model.model.layers.33.mlp.down_proj.input_scale": "model-00003-of-00003.safetensors",
641
  "language_model.model.layers.33.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
@@ -744,11 +792,19 @@
744
  "language_model.model.layers.37.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
745
  "language_model.model.layers.37.self_attn.v_proj.weight_scale": "model-00003-of-00003.safetensors",
746
  "language_model.model.layers.38.cross_attn.k_norm.weight": "model-00003-of-00003.safetensors",
 
747
  "language_model.model.layers.38.cross_attn.k_proj.weight": "model-00003-of-00003.safetensors",
 
 
748
  "language_model.model.layers.38.cross_attn.o_proj.weight": "model-00003-of-00003.safetensors",
 
749
  "language_model.model.layers.38.cross_attn.q_norm.weight": "model-00003-of-00003.safetensors",
 
750
  "language_model.model.layers.38.cross_attn.q_proj.weight": "model-00003-of-00003.safetensors",
 
 
751
  "language_model.model.layers.38.cross_attn.v_proj.weight": "model-00003-of-00003.safetensors",
 
752
  "language_model.model.layers.38.cross_attn_attn_gate": "model-00003-of-00003.safetensors",
753
  "language_model.model.layers.38.cross_attn_mlp_gate": "model-00003-of-00003.safetensors",
754
  "language_model.model.layers.38.input_layernorm.weight": "model-00003-of-00003.safetensors",
@@ -883,11 +939,19 @@
883
  "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
884
  "language_model.model.layers.7.self_attn.v_proj.weight_scale": "model-00001-of-00003.safetensors",
885
  "language_model.model.layers.8.cross_attn.k_norm.weight": "model-00001-of-00003.safetensors",
 
886
  "language_model.model.layers.8.cross_attn.k_proj.weight": "model-00001-of-00003.safetensors",
 
 
887
  "language_model.model.layers.8.cross_attn.o_proj.weight": "model-00001-of-00003.safetensors",
 
888
  "language_model.model.layers.8.cross_attn.q_norm.weight": "model-00001-of-00003.safetensors",
 
889
  "language_model.model.layers.8.cross_attn.q_proj.weight": "model-00001-of-00003.safetensors",
 
 
890
  "language_model.model.layers.8.cross_attn.v_proj.weight": "model-00001-of-00003.safetensors",
 
891
  "language_model.model.layers.8.cross_attn_attn_gate": "model-00001-of-00003.safetensors",
892
  "language_model.model.layers.8.cross_attn_mlp_gate": "model-00001-of-00003.safetensors",
893
  "language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
@@ -901,17 +965,17 @@
901
  "language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
902
  "language_model.model.layers.8.mlp.up_proj.weight_scale": "model-00001-of-00003.safetensors",
903
  "language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
904
- "language_model.model.layers.9.input_layernorm.weight": "model-00002-of-00003.safetensors",
905
- "language_model.model.layers.9.mlp.down_proj.input_scale": "model-00002-of-00003.safetensors",
906
- "language_model.model.layers.9.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
907
- "language_model.model.layers.9.mlp.down_proj.weight_scale": "model-00002-of-00003.safetensors",
908
  "language_model.model.layers.9.mlp.gate_proj.input_scale": "model-00001-of-00003.safetensors",
909
  "language_model.model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
910
  "language_model.model.layers.9.mlp.gate_proj.weight_scale": "model-00001-of-00003.safetensors",
911
  "language_model.model.layers.9.mlp.up_proj.input_scale": "model-00001-of-00003.safetensors",
912
  "language_model.model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
913
  "language_model.model.layers.9.mlp.up_proj.weight_scale": "model-00001-of-00003.safetensors",
914
- "language_model.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
915
  "language_model.model.layers.9.self_attn.k_proj.input_scale": "model-00001-of-00003.safetensors",
916
  "language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
917
  "language_model.model.layers.9.self_attn.k_proj.weight_scale": "model-00001-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 12616291718
4
  },
5
  "weight_map": {
6
  "language_model.lm_head.weight": "model-00003-of-00003.safetensors",
 
64
  "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
65
  "language_model.model.layers.10.mlp.up_proj.weight_scale": "model-00002-of-00003.safetensors",
66
  "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "language_model.model.layers.10.self_attn.k_proj.input_scale": "model-00001-of-00003.safetensors",
68
+ "language_model.model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
69
+ "language_model.model.layers.10.self_attn.k_proj.weight_scale": "model-00001-of-00003.safetensors",
70
+ "language_model.model.layers.10.self_attn.kv_scale": "model-00001-of-00003.safetensors",
71
  "language_model.model.layers.10.self_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
72
  "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
73
  "language_model.model.layers.10.self_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
74
+ "language_model.model.layers.10.self_attn.q_proj.input_scale": "model-00001-of-00003.safetensors",
75
+ "language_model.model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
76
+ "language_model.model.layers.10.self_attn.q_proj.weight_scale": "model-00001-of-00003.safetensors",
77
+ "language_model.model.layers.10.self_attn.v_proj.input_scale": "model-00001-of-00003.safetensors",
78
+ "language_model.model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
79
+ "language_model.model.layers.10.self_attn.v_proj.weight_scale": "model-00001-of-00003.safetensors",
80
  "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
  "language_model.model.layers.11.mlp.down_proj.input_scale": "model-00002-of-00003.safetensors",
82
  "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
 
126
  "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
127
  "language_model.model.layers.12.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
128
  "language_model.model.layers.13.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
129
+ "language_model.model.layers.13.cross_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
130
  "language_model.model.layers.13.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
+ "language_model.model.layers.13.cross_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
132
+ "language_model.model.layers.13.cross_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
133
  "language_model.model.layers.13.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
134
+ "language_model.model.layers.13.cross_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
135
  "language_model.model.layers.13.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
136
+ "language_model.model.layers.13.cross_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
137
  "language_model.model.layers.13.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
138
+ "language_model.model.layers.13.cross_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
139
+ "language_model.model.layers.13.cross_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
140
  "language_model.model.layers.13.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
141
+ "language_model.model.layers.13.cross_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
142
  "language_model.model.layers.13.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
143
  "language_model.model.layers.13.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
144
  "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
 
249
  "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
250
  "language_model.model.layers.17.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
251
  "language_model.model.layers.18.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
252
+ "language_model.model.layers.18.cross_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
253
  "language_model.model.layers.18.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
254
+ "language_model.model.layers.18.cross_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
255
+ "language_model.model.layers.18.cross_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
256
  "language_model.model.layers.18.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
257
+ "language_model.model.layers.18.cross_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
258
  "language_model.model.layers.18.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
259
+ "language_model.model.layers.18.cross_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
260
  "language_model.model.layers.18.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
261
+ "language_model.model.layers.18.cross_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
262
+ "language_model.model.layers.18.cross_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
263
  "language_model.model.layers.18.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
264
+ "language_model.model.layers.18.cross_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
265
  "language_model.model.layers.18.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
266
  "language_model.model.layers.18.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
267
  "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
 
396
  "language_model.model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
397
  "language_model.model.layers.22.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
398
  "language_model.model.layers.23.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
399
+ "language_model.model.layers.23.cross_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
400
  "language_model.model.layers.23.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
401
+ "language_model.model.layers.23.cross_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
402
+ "language_model.model.layers.23.cross_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
403
  "language_model.model.layers.23.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
404
+ "language_model.model.layers.23.cross_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
405
  "language_model.model.layers.23.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
406
+ "language_model.model.layers.23.cross_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
407
  "language_model.model.layers.23.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
408
+ "language_model.model.layers.23.cross_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
409
+ "language_model.model.layers.23.cross_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
410
  "language_model.model.layers.23.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
411
+ "language_model.model.layers.23.cross_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
412
  "language_model.model.layers.23.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
413
  "language_model.model.layers.23.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
414
  "language_model.model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors",
 
519
  "language_model.model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
520
  "language_model.model.layers.27.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
521
  "language_model.model.layers.28.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
522
+ "language_model.model.layers.28.cross_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
523
  "language_model.model.layers.28.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
524
+ "language_model.model.layers.28.cross_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
525
+ "language_model.model.layers.28.cross_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
526
  "language_model.model.layers.28.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
527
+ "language_model.model.layers.28.cross_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
528
  "language_model.model.layers.28.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
529
+ "language_model.model.layers.28.cross_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
530
  "language_model.model.layers.28.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
531
+ "language_model.model.layers.28.cross_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
532
+ "language_model.model.layers.28.cross_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
533
  "language_model.model.layers.28.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
534
+ "language_model.model.layers.28.cross_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
535
  "language_model.model.layers.28.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
536
  "language_model.model.layers.28.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
537
  "language_model.model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors",
 
570
  "language_model.model.layers.29.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
571
  "language_model.model.layers.29.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
572
  "language_model.model.layers.3.cross_attn.k_norm.weight": "model-00001-of-00003.safetensors",
573
+ "language_model.model.layers.3.cross_attn.k_proj.input_scale": "model-00001-of-00003.safetensors",
574
  "language_model.model.layers.3.cross_attn.k_proj.weight": "model-00001-of-00003.safetensors",
575
+ "language_model.model.layers.3.cross_attn.k_proj.weight_scale": "model-00001-of-00003.safetensors",
576
+ "language_model.model.layers.3.cross_attn.o_proj.input_scale": "model-00001-of-00003.safetensors",
577
  "language_model.model.layers.3.cross_attn.o_proj.weight": "model-00001-of-00003.safetensors",
578
+ "language_model.model.layers.3.cross_attn.o_proj.weight_scale": "model-00001-of-00003.safetensors",
579
  "language_model.model.layers.3.cross_attn.q_norm.weight": "model-00001-of-00003.safetensors",
580
+ "language_model.model.layers.3.cross_attn.q_proj.input_scale": "model-00001-of-00003.safetensors",
581
  "language_model.model.layers.3.cross_attn.q_proj.weight": "model-00001-of-00003.safetensors",
582
+ "language_model.model.layers.3.cross_attn.q_proj.weight_scale": "model-00001-of-00003.safetensors",
583
+ "language_model.model.layers.3.cross_attn.v_proj.input_scale": "model-00001-of-00003.safetensors",
584
  "language_model.model.layers.3.cross_attn.v_proj.weight": "model-00001-of-00003.safetensors",
585
+ "language_model.model.layers.3.cross_attn.v_proj.weight_scale": "model-00001-of-00003.safetensors",
586
  "language_model.model.layers.3.cross_attn_attn_gate": "model-00001-of-00003.safetensors",
587
  "language_model.model.layers.3.cross_attn_mlp_gate": "model-00001-of-00003.safetensors",
588
  "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
 
620
  "language_model.model.layers.30.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
621
  "language_model.model.layers.30.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
622
  "language_model.model.layers.30.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
623
+ "language_model.model.layers.31.input_layernorm.weight": "model-00002-of-00003.safetensors",
624
+ "language_model.model.layers.31.mlp.down_proj.input_scale": "model-00002-of-00003.safetensors",
625
+ "language_model.model.layers.31.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
626
+ "language_model.model.layers.31.mlp.down_proj.weight_scale": "model-00002-of-00003.safetensors",
627
  "language_model.model.layers.31.mlp.gate_proj.input_scale": "model-00002-of-00003.safetensors",
628
  "language_model.model.layers.31.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
629
  "language_model.model.layers.31.mlp.gate_proj.weight_scale": "model-00002-of-00003.safetensors",
630
  "language_model.model.layers.31.mlp.up_proj.input_scale": "model-00002-of-00003.safetensors",
631
  "language_model.model.layers.31.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
632
  "language_model.model.layers.31.mlp.up_proj.weight_scale": "model-00002-of-00003.safetensors",
633
+ "language_model.model.layers.31.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
634
  "language_model.model.layers.31.self_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
635
  "language_model.model.layers.31.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
636
  "language_model.model.layers.31.self_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
 
644
  "language_model.model.layers.31.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
645
  "language_model.model.layers.31.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
646
  "language_model.model.layers.31.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
647
+ "language_model.model.layers.32.input_layernorm.weight": "model-00002-of-00003.safetensors",
648
+ "language_model.model.layers.32.mlp.down_proj.input_scale": "model-00002-of-00003.safetensors",
649
+ "language_model.model.layers.32.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
650
+ "language_model.model.layers.32.mlp.down_proj.weight_scale": "model-00002-of-00003.safetensors",
651
+ "language_model.model.layers.32.mlp.gate_proj.input_scale": "model-00002-of-00003.safetensors",
652
+ "language_model.model.layers.32.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
653
+ "language_model.model.layers.32.mlp.gate_proj.weight_scale": "model-00002-of-00003.safetensors",
654
+ "language_model.model.layers.32.mlp.up_proj.input_scale": "model-00002-of-00003.safetensors",
655
+ "language_model.model.layers.32.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
656
+ "language_model.model.layers.32.mlp.up_proj.weight_scale": "model-00002-of-00003.safetensors",
657
+ "language_model.model.layers.32.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
658
+ "language_model.model.layers.32.self_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
659
+ "language_model.model.layers.32.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
660
+ "language_model.model.layers.32.self_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
661
+ "language_model.model.layers.32.self_attn.kv_scale": "model-00002-of-00003.safetensors",
662
+ "language_model.model.layers.32.self_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
663
+ "language_model.model.layers.32.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
664
+ "language_model.model.layers.32.self_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
665
+ "language_model.model.layers.32.self_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
666
+ "language_model.model.layers.32.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
667
+ "language_model.model.layers.32.self_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
668
+ "language_model.model.layers.32.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
669
+ "language_model.model.layers.32.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
670
+ "language_model.model.layers.32.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
671
  "language_model.model.layers.33.cross_attn.k_norm.weight": "model-00003-of-00003.safetensors",
672
+ "language_model.model.layers.33.cross_attn.k_proj.input_scale": "model-00003-of-00003.safetensors",
673
  "language_model.model.layers.33.cross_attn.k_proj.weight": "model-00003-of-00003.safetensors",
674
+ "language_model.model.layers.33.cross_attn.k_proj.weight_scale": "model-00003-of-00003.safetensors",
675
+ "language_model.model.layers.33.cross_attn.o_proj.input_scale": "model-00003-of-00003.safetensors",
676
  "language_model.model.layers.33.cross_attn.o_proj.weight": "model-00003-of-00003.safetensors",
677
+ "language_model.model.layers.33.cross_attn.o_proj.weight_scale": "model-00003-of-00003.safetensors",
678
  "language_model.model.layers.33.cross_attn.q_norm.weight": "model-00003-of-00003.safetensors",
679
+ "language_model.model.layers.33.cross_attn.q_proj.input_scale": "model-00003-of-00003.safetensors",
680
  "language_model.model.layers.33.cross_attn.q_proj.weight": "model-00003-of-00003.safetensors",
681
+ "language_model.model.layers.33.cross_attn.q_proj.weight_scale": "model-00003-of-00003.safetensors",
682
+ "language_model.model.layers.33.cross_attn.v_proj.input_scale": "model-00003-of-00003.safetensors",
683
  "language_model.model.layers.33.cross_attn.v_proj.weight": "model-00003-of-00003.safetensors",
684
+ "language_model.model.layers.33.cross_attn.v_proj.weight_scale": "model-00003-of-00003.safetensors",
685
+ "language_model.model.layers.33.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
686
+ "language_model.model.layers.33.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
687
  "language_model.model.layers.33.input_layernorm.weight": "model-00003-of-00003.safetensors",
688
  "language_model.model.layers.33.mlp.down_proj.input_scale": "model-00003-of-00003.safetensors",
689
  "language_model.model.layers.33.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
 
792
  "language_model.model.layers.37.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
793
  "language_model.model.layers.37.self_attn.v_proj.weight_scale": "model-00003-of-00003.safetensors",
794
  "language_model.model.layers.38.cross_attn.k_norm.weight": "model-00003-of-00003.safetensors",
795
+ "language_model.model.layers.38.cross_attn.k_proj.input_scale": "model-00003-of-00003.safetensors",
796
  "language_model.model.layers.38.cross_attn.k_proj.weight": "model-00003-of-00003.safetensors",
797
+ "language_model.model.layers.38.cross_attn.k_proj.weight_scale": "model-00003-of-00003.safetensors",
798
+ "language_model.model.layers.38.cross_attn.o_proj.input_scale": "model-00003-of-00003.safetensors",
799
  "language_model.model.layers.38.cross_attn.o_proj.weight": "model-00003-of-00003.safetensors",
800
+ "language_model.model.layers.38.cross_attn.o_proj.weight_scale": "model-00003-of-00003.safetensors",
801
  "language_model.model.layers.38.cross_attn.q_norm.weight": "model-00003-of-00003.safetensors",
802
+ "language_model.model.layers.38.cross_attn.q_proj.input_scale": "model-00003-of-00003.safetensors",
803
  "language_model.model.layers.38.cross_attn.q_proj.weight": "model-00003-of-00003.safetensors",
804
+ "language_model.model.layers.38.cross_attn.q_proj.weight_scale": "model-00003-of-00003.safetensors",
805
+ "language_model.model.layers.38.cross_attn.v_proj.input_scale": "model-00003-of-00003.safetensors",
806
  "language_model.model.layers.38.cross_attn.v_proj.weight": "model-00003-of-00003.safetensors",
807
+ "language_model.model.layers.38.cross_attn.v_proj.weight_scale": "model-00003-of-00003.safetensors",
808
  "language_model.model.layers.38.cross_attn_attn_gate": "model-00003-of-00003.safetensors",
809
  "language_model.model.layers.38.cross_attn_mlp_gate": "model-00003-of-00003.safetensors",
810
  "language_model.model.layers.38.input_layernorm.weight": "model-00003-of-00003.safetensors",
 
939
  "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
940
  "language_model.model.layers.7.self_attn.v_proj.weight_scale": "model-00001-of-00003.safetensors",
941
  "language_model.model.layers.8.cross_attn.k_norm.weight": "model-00001-of-00003.safetensors",
942
+ "language_model.model.layers.8.cross_attn.k_proj.input_scale": "model-00001-of-00003.safetensors",
943
  "language_model.model.layers.8.cross_attn.k_proj.weight": "model-00001-of-00003.safetensors",
944
+ "language_model.model.layers.8.cross_attn.k_proj.weight_scale": "model-00001-of-00003.safetensors",
945
+ "language_model.model.layers.8.cross_attn.o_proj.input_scale": "model-00001-of-00003.safetensors",
946
  "language_model.model.layers.8.cross_attn.o_proj.weight": "model-00001-of-00003.safetensors",
947
+ "language_model.model.layers.8.cross_attn.o_proj.weight_scale": "model-00001-of-00003.safetensors",
948
  "language_model.model.layers.8.cross_attn.q_norm.weight": "model-00001-of-00003.safetensors",
949
+ "language_model.model.layers.8.cross_attn.q_proj.input_scale": "model-00001-of-00003.safetensors",
950
  "language_model.model.layers.8.cross_attn.q_proj.weight": "model-00001-of-00003.safetensors",
951
+ "language_model.model.layers.8.cross_attn.q_proj.weight_scale": "model-00001-of-00003.safetensors",
952
+ "language_model.model.layers.8.cross_attn.v_proj.input_scale": "model-00001-of-00003.safetensors",
953
  "language_model.model.layers.8.cross_attn.v_proj.weight": "model-00001-of-00003.safetensors",
954
+ "language_model.model.layers.8.cross_attn.v_proj.weight_scale": "model-00001-of-00003.safetensors",
955
  "language_model.model.layers.8.cross_attn_attn_gate": "model-00001-of-00003.safetensors",
956
  "language_model.model.layers.8.cross_attn_mlp_gate": "model-00001-of-00003.safetensors",
957
  "language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
 
965
  "language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
966
  "language_model.model.layers.8.mlp.up_proj.weight_scale": "model-00001-of-00003.safetensors",
967
  "language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
968
+ "language_model.model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
969
+ "language_model.model.layers.9.mlp.down_proj.input_scale": "model-00001-of-00003.safetensors",
970
+ "language_model.model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
971
+ "language_model.model.layers.9.mlp.down_proj.weight_scale": "model-00001-of-00003.safetensors",
972
  "language_model.model.layers.9.mlp.gate_proj.input_scale": "model-00001-of-00003.safetensors",
973
  "language_model.model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
974
  "language_model.model.layers.9.mlp.gate_proj.weight_scale": "model-00001-of-00003.safetensors",
975
  "language_model.model.layers.9.mlp.up_proj.input_scale": "model-00001-of-00003.safetensors",
976
  "language_model.model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
977
  "language_model.model.layers.9.mlp.up_proj.weight_scale": "model-00001-of-00003.safetensors",
978
+ "language_model.model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
979
  "language_model.model.layers.9.self_attn.k_proj.input_scale": "model-00001-of-00003.safetensors",
980
  "language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
981
  "language_model.model.layers.9.self_attn.k_proj.weight_scale": "model-00001-of-00003.safetensors",