haoyang-amd commited on
Commit
5afe644
·
verified ·
1 Parent(s): f9c4890

update files

Browse files
chat_template.json ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ {
2
+ "chat_template": "{{- bos_token }}\n{%- if custom_tools is defined %}\n {%- set tools = custom_tools %}\n{%- endif %}\n{%- if not tools_in_user_message is defined %}\n {%- set tools_in_user_message = true %}\n{%- endif %}\n{%- if not date_string is defined %}\n {%- if strftime_now is defined %}\n {%- set date_string = strftime_now(\"%d %b %Y\") %}\n {%- else %}\n {%- set date_string = \"26 Jul 2024\" %}\n {%- endif %}\n{%- endif %}\n{%- if not tools is defined %}\n {%- set tools = none %}\n{%- endif %}\n\n{#- This block extracts the system message, so we can slot it into the right place. #}\n{%- if messages[0]['role'] == 'system' %}\n {%- set system_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n{%- else %}\n {%- set system_message = \"\" %}\n{%- endif %}\n\n{#- Find out if there are any images #}\n{% set image_ns = namespace(has_images=false) %} \n{%- for message in messages %}\n {%- for content in message['content'] %}\n {%- if content['type'] == 'image' %}\n {%- set image_ns.has_images = true %}\n {%- endif %}\n {%- endfor %}\n{%- endfor %}\n\n{#- Error out if there are images and system message #}\n{%- if image_ns.has_images and not system_message == \"\" %}\n {{- raise_exception(\"Prompting with images is incompatible with system messages.\") }}\n{%- endif %}\n\n{#- System message if there are no images #}\n{%- if not image_ns.has_images %}\n {{- \"<|start_header_id|>system<|end_header_id|>\\n\\n\" }}\n {%- if tools is not none %}\n {{- \"Environment: ipython\\n\" }}\n {%- endif %}\n {{- \"Cutting Knowledge Date: December 2023\\n\" }}\n {{- \"Today Date: \" + date_string + \"\\n\\n\" }}\n {%- if tools is not none and not tools_in_user_message %}\n {{- \"You have access to the following functions. To call a function, please respond with JSON for a function call.\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {%- endif %}\n {{- system_message }}\n {{- \"<|eot_id|>\" }}\n{%- endif %}\n\n{#- Custom tools are passed in a user message with some extra guidance #}\n{%- if tools_in_user_message and not tools is none %}\n {#- Extract the first user message so we can plug it in here #}\n {%- if messages | length != 0 %}\n {%- set first_user_message = messages[0]['content']|trim %}\n {%- set messages = messages[1:] %}\n {%- else %}\n {{- raise_exception(\"Cannot put tools in the first user message when there's no first user message!\") }}\n{%- endif %}\n {{- '<|start_header_id|>user<|end_header_id|>\\n\\n' -}}\n {{- \"Given the following functions, please respond with a JSON for a function call \" }}\n {{- \"with its proper arguments that best answers the given prompt.\\n\\n\" }}\n {{- 'Respond in the format {\"name\": function name, \"parameters\": dictionary of argument name and its value}.' }}\n {{- \"Do not use variables.\\n\\n\" }}\n {%- for t in tools %}\n {{- t | tojson(indent=4) }}\n {{- \"\\n\\n\" }}\n {%- endfor %}\n {{- first_user_message + \"<|eot_id|>\"}}\n{%- endif %}\n\n{%- for message in messages %}\n {%- if not (message.role == 'ipython' or message.role == 'tool' or 'tool_calls' in message) %}\n {{- '<|start_header_id|>' + message['role'] + '<|end_header_id|>\\n\\n' }}\n {%- if message['content'] is string %}\n {{- message['content'] }}\n {%- else %}\n {%- for content in message['content'] %}\n {%- if content['type'] == 'image' %}\n {{- '<|image|>' }}\n {%- elif content['type'] == 'text' %}\n {{- content['text'] }}\n {%- endif %}\n {%- endfor %}\n {%- endif %}\n {{- '<|eot_id|>' }}\n {%- elif 'tool_calls' in message %}\n {%- if not message.tool_calls|length == 1 %}\n {{- raise_exception(\"This model only supports single tool-calls at once!\") }}\n {%- endif %}\n {%- set tool_call = message.tool_calls[0].function %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' -}}\n {{- '{\"name\": \"' + tool_call.name + '\", ' }}\n {{- '\"parameters\": ' }}\n {{- tool_call.arguments | tojson }}\n {{- \"}\" }}\n {{- \"<|eot_id|>\" }}\n {%- elif message.role == \"tool\" or message.role == \"ipython\" %}\n {{- \"<|start_header_id|>ipython<|end_header_id|>\\n\\n\" }}\n {%- if message.content is mapping or message.content is iterable %}\n {{- message.content | tojson }}\n {%- else %}\n {{- message.content }}\n {%- endif %}\n {{- \"<|eot_id|>\" }}\n {%- endif %}\n{%- endfor %}\n{%- if add_generation_prompt %}\n {{- '<|start_header_id|>assistant<|end_header_id|>\\n\\n' }}\n{%- endif %}\n"
3
+ }
config.json CHANGED
@@ -1,5 +1,5 @@
1
  {
2
- "_name_or_path": "/model_path/Llama-3.2-11B-Vision-Instruct/",
3
  "architectures": [
4
  "MllamaForConditionalGeneration"
5
  ],
@@ -8,7 +8,10 @@
8
  "quantization_config": {
9
  "activation_scheme": "static",
10
  "ignored_layers": [
11
- "lm_head"
 
 
 
12
  ],
13
  "kv_cache_scheme": "static",
14
  "quant_method": "fp8"
 
1
  {
2
+ "_name_or_path": "/model_path/Llama-3.2-11B-Vision-Instruct",
3
  "architectures": [
4
  "MllamaForConditionalGeneration"
5
  ],
 
8
  "quantization_config": {
9
  "activation_scheme": "static",
10
  "ignored_layers": [
11
+ "*lm_head",
12
+ "*cross_attn*",
13
+ "vision_model*",
14
+ "multi_modal_projector"
15
  ],
16
  "kv_cache_scheme": "static",
17
  "quant_method": "fp8"
model-00001-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7913886bb1471568324e41653aad932101a8bc86d76b56ff71dade79b0f9afd2
3
- size 4984341162
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3a3123a5a86e315ff58920026ef7a912fb13788747d38acbbbec47012e89052b
3
+ size 4984320774
model-00002-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:71f3d10886ee124b629c0813a3d01b675650fe49d085fdce7a1cd09bae34292b
3
- size 4991669352
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:da57cc3b84a544a430a742177e2ba98d5870cba85bf0cb7f0c3974f31e73d715
3
+ size 4966480928
model-00003-of-00003.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d70e8659c127002e674b5c0ab5f56bbfa6b6bd49ed8cade7fd9169548bdbb9bc
3
- size 2640468236
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8a6c5ade219cab320a5fef98063d814d9c5005fe4c015f9d75d0c9a6d91bd6b5
3
+ size 3001212912
model.safetensors.index.json CHANGED
@@ -1,6 +1,6 @@
1
  {
2
  "metadata": {
3
- "total_size": 12616291750
4
  },
5
  "weight_map": {
6
  "language_model.lm_head.weight": "model-00003-of-00003.safetensors",
@@ -64,19 +64,19 @@
64
  "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
65
  "language_model.model.layers.10.mlp.up_proj.weight_scale": "model-00002-of-00003.safetensors",
66
  "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
- "language_model.model.layers.10.self_attn.k_proj.input_scale": "model-00001-of-00003.safetensors",
68
- "language_model.model.layers.10.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
69
- "language_model.model.layers.10.self_attn.k_proj.weight_scale": "model-00001-of-00003.safetensors",
70
- "language_model.model.layers.10.self_attn.kv_scale": "model-00001-of-00003.safetensors",
71
  "language_model.model.layers.10.self_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
72
  "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
73
  "language_model.model.layers.10.self_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
74
- "language_model.model.layers.10.self_attn.q_proj.input_scale": "model-00001-of-00003.safetensors",
75
- "language_model.model.layers.10.self_attn.q_proj.weight": "model-00001-of-00003.safetensors",
76
- "language_model.model.layers.10.self_attn.q_proj.weight_scale": "model-00001-of-00003.safetensors",
77
- "language_model.model.layers.10.self_attn.v_proj.input_scale": "model-00001-of-00003.safetensors",
78
- "language_model.model.layers.10.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
79
- "language_model.model.layers.10.self_attn.v_proj.weight_scale": "model-00001-of-00003.safetensors",
80
  "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
  "language_model.model.layers.11.mlp.down_proj.input_scale": "model-00002-of-00003.safetensors",
82
  "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
@@ -126,20 +126,11 @@
126
  "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
127
  "language_model.model.layers.12.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
128
  "language_model.model.layers.13.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
129
- "language_model.model.layers.13.cross_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
130
  "language_model.model.layers.13.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
131
- "language_model.model.layers.13.cross_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
132
- "language_model.model.layers.13.cross_attn.kv_scale": "model-00002-of-00003.safetensors",
133
- "language_model.model.layers.13.cross_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
134
  "language_model.model.layers.13.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
135
- "language_model.model.layers.13.cross_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
136
  "language_model.model.layers.13.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
137
- "language_model.model.layers.13.cross_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
138
  "language_model.model.layers.13.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
139
- "language_model.model.layers.13.cross_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
140
- "language_model.model.layers.13.cross_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
141
  "language_model.model.layers.13.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
142
- "language_model.model.layers.13.cross_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
143
  "language_model.model.layers.13.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
144
  "language_model.model.layers.13.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
145
  "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
@@ -250,20 +241,11 @@
250
  "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
251
  "language_model.model.layers.17.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
252
  "language_model.model.layers.18.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
253
- "language_model.model.layers.18.cross_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
254
  "language_model.model.layers.18.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
255
- "language_model.model.layers.18.cross_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
256
- "language_model.model.layers.18.cross_attn.kv_scale": "model-00002-of-00003.safetensors",
257
- "language_model.model.layers.18.cross_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
258
  "language_model.model.layers.18.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
259
- "language_model.model.layers.18.cross_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
260
  "language_model.model.layers.18.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
261
- "language_model.model.layers.18.cross_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
262
  "language_model.model.layers.18.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
263
- "language_model.model.layers.18.cross_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
264
- "language_model.model.layers.18.cross_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
265
  "language_model.model.layers.18.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
266
- "language_model.model.layers.18.cross_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
267
  "language_model.model.layers.18.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
268
  "language_model.model.layers.18.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
269
  "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
@@ -398,20 +380,11 @@
398
  "language_model.model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
399
  "language_model.model.layers.22.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
400
  "language_model.model.layers.23.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
401
- "language_model.model.layers.23.cross_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
402
  "language_model.model.layers.23.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
403
- "language_model.model.layers.23.cross_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
404
- "language_model.model.layers.23.cross_attn.kv_scale": "model-00002-of-00003.safetensors",
405
- "language_model.model.layers.23.cross_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
406
  "language_model.model.layers.23.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
407
- "language_model.model.layers.23.cross_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
408
  "language_model.model.layers.23.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
409
- "language_model.model.layers.23.cross_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
410
  "language_model.model.layers.23.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
411
- "language_model.model.layers.23.cross_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
412
- "language_model.model.layers.23.cross_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
413
  "language_model.model.layers.23.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
414
- "language_model.model.layers.23.cross_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
415
  "language_model.model.layers.23.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
416
  "language_model.model.layers.23.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
417
  "language_model.model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors",
@@ -522,20 +495,11 @@
522
  "language_model.model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
523
  "language_model.model.layers.27.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
524
  "language_model.model.layers.28.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
525
- "language_model.model.layers.28.cross_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
526
  "language_model.model.layers.28.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
527
- "language_model.model.layers.28.cross_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
528
- "language_model.model.layers.28.cross_attn.kv_scale": "model-00002-of-00003.safetensors",
529
- "language_model.model.layers.28.cross_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
530
  "language_model.model.layers.28.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
531
- "language_model.model.layers.28.cross_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
532
  "language_model.model.layers.28.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
533
- "language_model.model.layers.28.cross_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
534
  "language_model.model.layers.28.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
535
- "language_model.model.layers.28.cross_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
536
- "language_model.model.layers.28.cross_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
537
  "language_model.model.layers.28.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
538
- "language_model.model.layers.28.cross_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
539
  "language_model.model.layers.28.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
540
  "language_model.model.layers.28.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
541
  "language_model.model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors",
@@ -574,20 +538,11 @@
574
  "language_model.model.layers.29.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
575
  "language_model.model.layers.29.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
576
  "language_model.model.layers.3.cross_attn.k_norm.weight": "model-00001-of-00003.safetensors",
577
- "language_model.model.layers.3.cross_attn.k_proj.input_scale": "model-00001-of-00003.safetensors",
578
  "language_model.model.layers.3.cross_attn.k_proj.weight": "model-00001-of-00003.safetensors",
579
- "language_model.model.layers.3.cross_attn.k_proj.weight_scale": "model-00001-of-00003.safetensors",
580
- "language_model.model.layers.3.cross_attn.kv_scale": "model-00001-of-00003.safetensors",
581
- "language_model.model.layers.3.cross_attn.o_proj.input_scale": "model-00001-of-00003.safetensors",
582
  "language_model.model.layers.3.cross_attn.o_proj.weight": "model-00001-of-00003.safetensors",
583
- "language_model.model.layers.3.cross_attn.o_proj.weight_scale": "model-00001-of-00003.safetensors",
584
  "language_model.model.layers.3.cross_attn.q_norm.weight": "model-00001-of-00003.safetensors",
585
- "language_model.model.layers.3.cross_attn.q_proj.input_scale": "model-00001-of-00003.safetensors",
586
  "language_model.model.layers.3.cross_attn.q_proj.weight": "model-00001-of-00003.safetensors",
587
- "language_model.model.layers.3.cross_attn.q_proj.weight_scale": "model-00001-of-00003.safetensors",
588
- "language_model.model.layers.3.cross_attn.v_proj.input_scale": "model-00001-of-00003.safetensors",
589
  "language_model.model.layers.3.cross_attn.v_proj.weight": "model-00001-of-00003.safetensors",
590
- "language_model.model.layers.3.cross_attn.v_proj.weight_scale": "model-00001-of-00003.safetensors",
591
  "language_model.model.layers.3.cross_attn_attn_gate": "model-00001-of-00003.safetensors",
592
  "language_model.model.layers.3.cross_attn_mlp_gate": "model-00001-of-00003.safetensors",
593
  "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
@@ -625,17 +580,17 @@
625
  "language_model.model.layers.30.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
626
  "language_model.model.layers.30.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
627
  "language_model.model.layers.30.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
628
- "language_model.model.layers.31.input_layernorm.weight": "model-00002-of-00003.safetensors",
629
- "language_model.model.layers.31.mlp.down_proj.input_scale": "model-00002-of-00003.safetensors",
630
- "language_model.model.layers.31.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
631
- "language_model.model.layers.31.mlp.down_proj.weight_scale": "model-00002-of-00003.safetensors",
632
  "language_model.model.layers.31.mlp.gate_proj.input_scale": "model-00002-of-00003.safetensors",
633
  "language_model.model.layers.31.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
634
  "language_model.model.layers.31.mlp.gate_proj.weight_scale": "model-00002-of-00003.safetensors",
635
  "language_model.model.layers.31.mlp.up_proj.input_scale": "model-00002-of-00003.safetensors",
636
  "language_model.model.layers.31.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
637
  "language_model.model.layers.31.mlp.up_proj.weight_scale": "model-00002-of-00003.safetensors",
638
- "language_model.model.layers.31.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
639
  "language_model.model.layers.31.self_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
640
  "language_model.model.layers.31.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
641
  "language_model.model.layers.31.self_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
@@ -649,47 +604,38 @@
649
  "language_model.model.layers.31.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
650
  "language_model.model.layers.31.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
651
  "language_model.model.layers.31.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
652
- "language_model.model.layers.32.input_layernorm.weight": "model-00002-of-00003.safetensors",
653
- "language_model.model.layers.32.mlp.down_proj.input_scale": "model-00002-of-00003.safetensors",
654
- "language_model.model.layers.32.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
655
- "language_model.model.layers.32.mlp.down_proj.weight_scale": "model-00002-of-00003.safetensors",
656
- "language_model.model.layers.32.mlp.gate_proj.input_scale": "model-00002-of-00003.safetensors",
657
- "language_model.model.layers.32.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
658
- "language_model.model.layers.32.mlp.gate_proj.weight_scale": "model-00002-of-00003.safetensors",
659
- "language_model.model.layers.32.mlp.up_proj.input_scale": "model-00002-of-00003.safetensors",
660
- "language_model.model.layers.32.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
661
- "language_model.model.layers.32.mlp.up_proj.weight_scale": "model-00002-of-00003.safetensors",
662
- "language_model.model.layers.32.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
663
- "language_model.model.layers.32.self_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
664
- "language_model.model.layers.32.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
665
- "language_model.model.layers.32.self_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
666
- "language_model.model.layers.32.self_attn.kv_scale": "model-00002-of-00003.safetensors",
667
- "language_model.model.layers.32.self_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
668
- "language_model.model.layers.32.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
669
- "language_model.model.layers.32.self_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
670
- "language_model.model.layers.32.self_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
671
- "language_model.model.layers.32.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
672
- "language_model.model.layers.32.self_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
673
- "language_model.model.layers.32.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
674
- "language_model.model.layers.32.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
675
- "language_model.model.layers.32.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
676
  "language_model.model.layers.33.cross_attn.k_norm.weight": "model-00003-of-00003.safetensors",
677
- "language_model.model.layers.33.cross_attn.k_proj.input_scale": "model-00003-of-00003.safetensors",
678
  "language_model.model.layers.33.cross_attn.k_proj.weight": "model-00003-of-00003.safetensors",
679
- "language_model.model.layers.33.cross_attn.k_proj.weight_scale": "model-00003-of-00003.safetensors",
680
- "language_model.model.layers.33.cross_attn.kv_scale": "model-00002-of-00003.safetensors",
681
- "language_model.model.layers.33.cross_attn.o_proj.input_scale": "model-00003-of-00003.safetensors",
682
  "language_model.model.layers.33.cross_attn.o_proj.weight": "model-00003-of-00003.safetensors",
683
- "language_model.model.layers.33.cross_attn.o_proj.weight_scale": "model-00003-of-00003.safetensors",
684
  "language_model.model.layers.33.cross_attn.q_norm.weight": "model-00003-of-00003.safetensors",
685
- "language_model.model.layers.33.cross_attn.q_proj.input_scale": "model-00003-of-00003.safetensors",
686
  "language_model.model.layers.33.cross_attn.q_proj.weight": "model-00003-of-00003.safetensors",
687
- "language_model.model.layers.33.cross_attn.q_proj.weight_scale": "model-00003-of-00003.safetensors",
688
- "language_model.model.layers.33.cross_attn.v_proj.input_scale": "model-00003-of-00003.safetensors",
689
  "language_model.model.layers.33.cross_attn.v_proj.weight": "model-00003-of-00003.safetensors",
690
- "language_model.model.layers.33.cross_attn.v_proj.weight_scale": "model-00003-of-00003.safetensors",
691
- "language_model.model.layers.33.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
692
- "language_model.model.layers.33.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
693
  "language_model.model.layers.33.input_layernorm.weight": "model-00003-of-00003.safetensors",
694
  "language_model.model.layers.33.mlp.down_proj.input_scale": "model-00003-of-00003.safetensors",
695
  "language_model.model.layers.33.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
@@ -798,20 +744,11 @@
798
  "language_model.model.layers.37.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
799
  "language_model.model.layers.37.self_attn.v_proj.weight_scale": "model-00003-of-00003.safetensors",
800
  "language_model.model.layers.38.cross_attn.k_norm.weight": "model-00003-of-00003.safetensors",
801
- "language_model.model.layers.38.cross_attn.k_proj.input_scale": "model-00003-of-00003.safetensors",
802
  "language_model.model.layers.38.cross_attn.k_proj.weight": "model-00003-of-00003.safetensors",
803
- "language_model.model.layers.38.cross_attn.k_proj.weight_scale": "model-00003-of-00003.safetensors",
804
- "language_model.model.layers.38.cross_attn.kv_scale": "model-00003-of-00003.safetensors",
805
- "language_model.model.layers.38.cross_attn.o_proj.input_scale": "model-00003-of-00003.safetensors",
806
  "language_model.model.layers.38.cross_attn.o_proj.weight": "model-00003-of-00003.safetensors",
807
- "language_model.model.layers.38.cross_attn.o_proj.weight_scale": "model-00003-of-00003.safetensors",
808
  "language_model.model.layers.38.cross_attn.q_norm.weight": "model-00003-of-00003.safetensors",
809
- "language_model.model.layers.38.cross_attn.q_proj.input_scale": "model-00003-of-00003.safetensors",
810
  "language_model.model.layers.38.cross_attn.q_proj.weight": "model-00003-of-00003.safetensors",
811
- "language_model.model.layers.38.cross_attn.q_proj.weight_scale": "model-00003-of-00003.safetensors",
812
- "language_model.model.layers.38.cross_attn.v_proj.input_scale": "model-00003-of-00003.safetensors",
813
  "language_model.model.layers.38.cross_attn.v_proj.weight": "model-00003-of-00003.safetensors",
814
- "language_model.model.layers.38.cross_attn.v_proj.weight_scale": "model-00003-of-00003.safetensors",
815
  "language_model.model.layers.38.cross_attn_attn_gate": "model-00003-of-00003.safetensors",
816
  "language_model.model.layers.38.cross_attn_mlp_gate": "model-00003-of-00003.safetensors",
817
  "language_model.model.layers.38.input_layernorm.weight": "model-00003-of-00003.safetensors",
@@ -946,20 +883,11 @@
946
  "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
947
  "language_model.model.layers.7.self_attn.v_proj.weight_scale": "model-00001-of-00003.safetensors",
948
  "language_model.model.layers.8.cross_attn.k_norm.weight": "model-00001-of-00003.safetensors",
949
- "language_model.model.layers.8.cross_attn.k_proj.input_scale": "model-00001-of-00003.safetensors",
950
  "language_model.model.layers.8.cross_attn.k_proj.weight": "model-00001-of-00003.safetensors",
951
- "language_model.model.layers.8.cross_attn.k_proj.weight_scale": "model-00001-of-00003.safetensors",
952
- "language_model.model.layers.8.cross_attn.kv_scale": "model-00001-of-00003.safetensors",
953
- "language_model.model.layers.8.cross_attn.o_proj.input_scale": "model-00001-of-00003.safetensors",
954
  "language_model.model.layers.8.cross_attn.o_proj.weight": "model-00001-of-00003.safetensors",
955
- "language_model.model.layers.8.cross_attn.o_proj.weight_scale": "model-00001-of-00003.safetensors",
956
  "language_model.model.layers.8.cross_attn.q_norm.weight": "model-00001-of-00003.safetensors",
957
- "language_model.model.layers.8.cross_attn.q_proj.input_scale": "model-00001-of-00003.safetensors",
958
  "language_model.model.layers.8.cross_attn.q_proj.weight": "model-00001-of-00003.safetensors",
959
- "language_model.model.layers.8.cross_attn.q_proj.weight_scale": "model-00001-of-00003.safetensors",
960
- "language_model.model.layers.8.cross_attn.v_proj.input_scale": "model-00001-of-00003.safetensors",
961
  "language_model.model.layers.8.cross_attn.v_proj.weight": "model-00001-of-00003.safetensors",
962
- "language_model.model.layers.8.cross_attn.v_proj.weight_scale": "model-00001-of-00003.safetensors",
963
  "language_model.model.layers.8.cross_attn_attn_gate": "model-00001-of-00003.safetensors",
964
  "language_model.model.layers.8.cross_attn_mlp_gate": "model-00001-of-00003.safetensors",
965
  "language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
@@ -973,17 +901,17 @@
973
  "language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
974
  "language_model.model.layers.8.mlp.up_proj.weight_scale": "model-00001-of-00003.safetensors",
975
  "language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
976
- "language_model.model.layers.9.input_layernorm.weight": "model-00001-of-00003.safetensors",
977
- "language_model.model.layers.9.mlp.down_proj.input_scale": "model-00001-of-00003.safetensors",
978
- "language_model.model.layers.9.mlp.down_proj.weight": "model-00001-of-00003.safetensors",
979
- "language_model.model.layers.9.mlp.down_proj.weight_scale": "model-00001-of-00003.safetensors",
980
  "language_model.model.layers.9.mlp.gate_proj.input_scale": "model-00001-of-00003.safetensors",
981
  "language_model.model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
982
  "language_model.model.layers.9.mlp.gate_proj.weight_scale": "model-00001-of-00003.safetensors",
983
  "language_model.model.layers.9.mlp.up_proj.input_scale": "model-00001-of-00003.safetensors",
984
  "language_model.model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
985
  "language_model.model.layers.9.mlp.up_proj.weight_scale": "model-00001-of-00003.safetensors",
986
- "language_model.model.layers.9.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
987
  "language_model.model.layers.9.self_attn.k_proj.input_scale": "model-00001-of-00003.safetensors",
988
  "language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
989
  "language_model.model.layers.9.self_attn.k_proj.weight_scale": "model-00001-of-00003.safetensors",
 
1
  {
2
  "metadata": {
3
+ "total_size": 12951835782
4
  },
5
  "weight_map": {
6
  "language_model.lm_head.weight": "model-00003-of-00003.safetensors",
 
64
  "language_model.model.layers.10.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
65
  "language_model.model.layers.10.mlp.up_proj.weight_scale": "model-00002-of-00003.safetensors",
66
  "language_model.model.layers.10.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
67
+ "language_model.model.layers.10.self_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
68
+ "language_model.model.layers.10.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
69
+ "language_model.model.layers.10.self_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
70
+ "language_model.model.layers.10.self_attn.kv_scale": "model-00002-of-00003.safetensors",
71
  "language_model.model.layers.10.self_attn.o_proj.input_scale": "model-00002-of-00003.safetensors",
72
  "language_model.model.layers.10.self_attn.o_proj.weight": "model-00002-of-00003.safetensors",
73
  "language_model.model.layers.10.self_attn.o_proj.weight_scale": "model-00002-of-00003.safetensors",
74
+ "language_model.model.layers.10.self_attn.q_proj.input_scale": "model-00002-of-00003.safetensors",
75
+ "language_model.model.layers.10.self_attn.q_proj.weight": "model-00002-of-00003.safetensors",
76
+ "language_model.model.layers.10.self_attn.q_proj.weight_scale": "model-00002-of-00003.safetensors",
77
+ "language_model.model.layers.10.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
78
+ "language_model.model.layers.10.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
79
+ "language_model.model.layers.10.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
80
  "language_model.model.layers.11.input_layernorm.weight": "model-00002-of-00003.safetensors",
81
  "language_model.model.layers.11.mlp.down_proj.input_scale": "model-00002-of-00003.safetensors",
82
  "language_model.model.layers.11.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
 
126
  "language_model.model.layers.12.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
127
  "language_model.model.layers.12.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
128
  "language_model.model.layers.13.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
 
129
  "language_model.model.layers.13.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
 
 
 
130
  "language_model.model.layers.13.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
 
131
  "language_model.model.layers.13.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
 
132
  "language_model.model.layers.13.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
 
 
133
  "language_model.model.layers.13.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
 
134
  "language_model.model.layers.13.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
135
  "language_model.model.layers.13.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
136
  "language_model.model.layers.13.input_layernorm.weight": "model-00002-of-00003.safetensors",
 
241
  "language_model.model.layers.17.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
242
  "language_model.model.layers.17.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
243
  "language_model.model.layers.18.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
 
244
  "language_model.model.layers.18.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
 
 
 
245
  "language_model.model.layers.18.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
 
246
  "language_model.model.layers.18.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
 
247
  "language_model.model.layers.18.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
 
 
248
  "language_model.model.layers.18.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
 
249
  "language_model.model.layers.18.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
250
  "language_model.model.layers.18.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
251
  "language_model.model.layers.18.input_layernorm.weight": "model-00002-of-00003.safetensors",
 
380
  "language_model.model.layers.22.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
381
  "language_model.model.layers.22.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
382
  "language_model.model.layers.23.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
 
383
  "language_model.model.layers.23.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
 
 
 
384
  "language_model.model.layers.23.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
 
385
  "language_model.model.layers.23.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
 
386
  "language_model.model.layers.23.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
 
 
387
  "language_model.model.layers.23.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
 
388
  "language_model.model.layers.23.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
389
  "language_model.model.layers.23.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
390
  "language_model.model.layers.23.input_layernorm.weight": "model-00002-of-00003.safetensors",
 
495
  "language_model.model.layers.27.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
496
  "language_model.model.layers.27.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
497
  "language_model.model.layers.28.cross_attn.k_norm.weight": "model-00002-of-00003.safetensors",
 
498
  "language_model.model.layers.28.cross_attn.k_proj.weight": "model-00002-of-00003.safetensors",
 
 
 
499
  "language_model.model.layers.28.cross_attn.o_proj.weight": "model-00002-of-00003.safetensors",
 
500
  "language_model.model.layers.28.cross_attn.q_norm.weight": "model-00002-of-00003.safetensors",
 
501
  "language_model.model.layers.28.cross_attn.q_proj.weight": "model-00002-of-00003.safetensors",
 
 
502
  "language_model.model.layers.28.cross_attn.v_proj.weight": "model-00002-of-00003.safetensors",
 
503
  "language_model.model.layers.28.cross_attn_attn_gate": "model-00002-of-00003.safetensors",
504
  "language_model.model.layers.28.cross_attn_mlp_gate": "model-00002-of-00003.safetensors",
505
  "language_model.model.layers.28.input_layernorm.weight": "model-00002-of-00003.safetensors",
 
538
  "language_model.model.layers.29.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
539
  "language_model.model.layers.29.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
540
  "language_model.model.layers.3.cross_attn.k_norm.weight": "model-00001-of-00003.safetensors",
 
541
  "language_model.model.layers.3.cross_attn.k_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
542
  "language_model.model.layers.3.cross_attn.o_proj.weight": "model-00001-of-00003.safetensors",
 
543
  "language_model.model.layers.3.cross_attn.q_norm.weight": "model-00001-of-00003.safetensors",
 
544
  "language_model.model.layers.3.cross_attn.q_proj.weight": "model-00001-of-00003.safetensors",
 
 
545
  "language_model.model.layers.3.cross_attn.v_proj.weight": "model-00001-of-00003.safetensors",
 
546
  "language_model.model.layers.3.cross_attn_attn_gate": "model-00001-of-00003.safetensors",
547
  "language_model.model.layers.3.cross_attn_mlp_gate": "model-00001-of-00003.safetensors",
548
  "language_model.model.layers.3.input_layernorm.weight": "model-00001-of-00003.safetensors",
 
580
  "language_model.model.layers.30.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
581
  "language_model.model.layers.30.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
582
  "language_model.model.layers.30.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
583
+ "language_model.model.layers.31.input_layernorm.weight": "model-00003-of-00003.safetensors",
584
+ "language_model.model.layers.31.mlp.down_proj.input_scale": "model-00003-of-00003.safetensors",
585
+ "language_model.model.layers.31.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
586
+ "language_model.model.layers.31.mlp.down_proj.weight_scale": "model-00003-of-00003.safetensors",
587
  "language_model.model.layers.31.mlp.gate_proj.input_scale": "model-00002-of-00003.safetensors",
588
  "language_model.model.layers.31.mlp.gate_proj.weight": "model-00002-of-00003.safetensors",
589
  "language_model.model.layers.31.mlp.gate_proj.weight_scale": "model-00002-of-00003.safetensors",
590
  "language_model.model.layers.31.mlp.up_proj.input_scale": "model-00002-of-00003.safetensors",
591
  "language_model.model.layers.31.mlp.up_proj.weight": "model-00002-of-00003.safetensors",
592
  "language_model.model.layers.31.mlp.up_proj.weight_scale": "model-00002-of-00003.safetensors",
593
+ "language_model.model.layers.31.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
594
  "language_model.model.layers.31.self_attn.k_proj.input_scale": "model-00002-of-00003.safetensors",
595
  "language_model.model.layers.31.self_attn.k_proj.weight": "model-00002-of-00003.safetensors",
596
  "language_model.model.layers.31.self_attn.k_proj.weight_scale": "model-00002-of-00003.safetensors",
 
604
  "language_model.model.layers.31.self_attn.v_proj.input_scale": "model-00002-of-00003.safetensors",
605
  "language_model.model.layers.31.self_attn.v_proj.weight": "model-00002-of-00003.safetensors",
606
  "language_model.model.layers.31.self_attn.v_proj.weight_scale": "model-00002-of-00003.safetensors",
607
+ "language_model.model.layers.32.input_layernorm.weight": "model-00003-of-00003.safetensors",
608
+ "language_model.model.layers.32.mlp.down_proj.input_scale": "model-00003-of-00003.safetensors",
609
+ "language_model.model.layers.32.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
610
+ "language_model.model.layers.32.mlp.down_proj.weight_scale": "model-00003-of-00003.safetensors",
611
+ "language_model.model.layers.32.mlp.gate_proj.input_scale": "model-00003-of-00003.safetensors",
612
+ "language_model.model.layers.32.mlp.gate_proj.weight": "model-00003-of-00003.safetensors",
613
+ "language_model.model.layers.32.mlp.gate_proj.weight_scale": "model-00003-of-00003.safetensors",
614
+ "language_model.model.layers.32.mlp.up_proj.input_scale": "model-00003-of-00003.safetensors",
615
+ "language_model.model.layers.32.mlp.up_proj.weight": "model-00003-of-00003.safetensors",
616
+ "language_model.model.layers.32.mlp.up_proj.weight_scale": "model-00003-of-00003.safetensors",
617
+ "language_model.model.layers.32.post_attention_layernorm.weight": "model-00003-of-00003.safetensors",
618
+ "language_model.model.layers.32.self_attn.k_proj.input_scale": "model-00003-of-00003.safetensors",
619
+ "language_model.model.layers.32.self_attn.k_proj.weight": "model-00003-of-00003.safetensors",
620
+ "language_model.model.layers.32.self_attn.k_proj.weight_scale": "model-00003-of-00003.safetensors",
621
+ "language_model.model.layers.32.self_attn.kv_scale": "model-00003-of-00003.safetensors",
622
+ "language_model.model.layers.32.self_attn.o_proj.input_scale": "model-00003-of-00003.safetensors",
623
+ "language_model.model.layers.32.self_attn.o_proj.weight": "model-00003-of-00003.safetensors",
624
+ "language_model.model.layers.32.self_attn.o_proj.weight_scale": "model-00003-of-00003.safetensors",
625
+ "language_model.model.layers.32.self_attn.q_proj.input_scale": "model-00003-of-00003.safetensors",
626
+ "language_model.model.layers.32.self_attn.q_proj.weight": "model-00003-of-00003.safetensors",
627
+ "language_model.model.layers.32.self_attn.q_proj.weight_scale": "model-00003-of-00003.safetensors",
628
+ "language_model.model.layers.32.self_attn.v_proj.input_scale": "model-00003-of-00003.safetensors",
629
+ "language_model.model.layers.32.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
630
+ "language_model.model.layers.32.self_attn.v_proj.weight_scale": "model-00003-of-00003.safetensors",
631
  "language_model.model.layers.33.cross_attn.k_norm.weight": "model-00003-of-00003.safetensors",
 
632
  "language_model.model.layers.33.cross_attn.k_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
633
  "language_model.model.layers.33.cross_attn.o_proj.weight": "model-00003-of-00003.safetensors",
 
634
  "language_model.model.layers.33.cross_attn.q_norm.weight": "model-00003-of-00003.safetensors",
 
635
  "language_model.model.layers.33.cross_attn.q_proj.weight": "model-00003-of-00003.safetensors",
 
 
636
  "language_model.model.layers.33.cross_attn.v_proj.weight": "model-00003-of-00003.safetensors",
637
+ "language_model.model.layers.33.cross_attn_attn_gate": "model-00003-of-00003.safetensors",
638
+ "language_model.model.layers.33.cross_attn_mlp_gate": "model-00003-of-00003.safetensors",
 
639
  "language_model.model.layers.33.input_layernorm.weight": "model-00003-of-00003.safetensors",
640
  "language_model.model.layers.33.mlp.down_proj.input_scale": "model-00003-of-00003.safetensors",
641
  "language_model.model.layers.33.mlp.down_proj.weight": "model-00003-of-00003.safetensors",
 
744
  "language_model.model.layers.37.self_attn.v_proj.weight": "model-00003-of-00003.safetensors",
745
  "language_model.model.layers.37.self_attn.v_proj.weight_scale": "model-00003-of-00003.safetensors",
746
  "language_model.model.layers.38.cross_attn.k_norm.weight": "model-00003-of-00003.safetensors",
 
747
  "language_model.model.layers.38.cross_attn.k_proj.weight": "model-00003-of-00003.safetensors",
 
 
 
748
  "language_model.model.layers.38.cross_attn.o_proj.weight": "model-00003-of-00003.safetensors",
 
749
  "language_model.model.layers.38.cross_attn.q_norm.weight": "model-00003-of-00003.safetensors",
 
750
  "language_model.model.layers.38.cross_attn.q_proj.weight": "model-00003-of-00003.safetensors",
 
 
751
  "language_model.model.layers.38.cross_attn.v_proj.weight": "model-00003-of-00003.safetensors",
 
752
  "language_model.model.layers.38.cross_attn_attn_gate": "model-00003-of-00003.safetensors",
753
  "language_model.model.layers.38.cross_attn_mlp_gate": "model-00003-of-00003.safetensors",
754
  "language_model.model.layers.38.input_layernorm.weight": "model-00003-of-00003.safetensors",
 
883
  "language_model.model.layers.7.self_attn.v_proj.weight": "model-00001-of-00003.safetensors",
884
  "language_model.model.layers.7.self_attn.v_proj.weight_scale": "model-00001-of-00003.safetensors",
885
  "language_model.model.layers.8.cross_attn.k_norm.weight": "model-00001-of-00003.safetensors",
 
886
  "language_model.model.layers.8.cross_attn.k_proj.weight": "model-00001-of-00003.safetensors",
 
 
 
887
  "language_model.model.layers.8.cross_attn.o_proj.weight": "model-00001-of-00003.safetensors",
 
888
  "language_model.model.layers.8.cross_attn.q_norm.weight": "model-00001-of-00003.safetensors",
 
889
  "language_model.model.layers.8.cross_attn.q_proj.weight": "model-00001-of-00003.safetensors",
 
 
890
  "language_model.model.layers.8.cross_attn.v_proj.weight": "model-00001-of-00003.safetensors",
 
891
  "language_model.model.layers.8.cross_attn_attn_gate": "model-00001-of-00003.safetensors",
892
  "language_model.model.layers.8.cross_attn_mlp_gate": "model-00001-of-00003.safetensors",
893
  "language_model.model.layers.8.input_layernorm.weight": "model-00001-of-00003.safetensors",
 
901
  "language_model.model.layers.8.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
902
  "language_model.model.layers.8.mlp.up_proj.weight_scale": "model-00001-of-00003.safetensors",
903
  "language_model.model.layers.8.post_attention_layernorm.weight": "model-00001-of-00003.safetensors",
904
+ "language_model.model.layers.9.input_layernorm.weight": "model-00002-of-00003.safetensors",
905
+ "language_model.model.layers.9.mlp.down_proj.input_scale": "model-00002-of-00003.safetensors",
906
+ "language_model.model.layers.9.mlp.down_proj.weight": "model-00002-of-00003.safetensors",
907
+ "language_model.model.layers.9.mlp.down_proj.weight_scale": "model-00002-of-00003.safetensors",
908
  "language_model.model.layers.9.mlp.gate_proj.input_scale": "model-00001-of-00003.safetensors",
909
  "language_model.model.layers.9.mlp.gate_proj.weight": "model-00001-of-00003.safetensors",
910
  "language_model.model.layers.9.mlp.gate_proj.weight_scale": "model-00001-of-00003.safetensors",
911
  "language_model.model.layers.9.mlp.up_proj.input_scale": "model-00001-of-00003.safetensors",
912
  "language_model.model.layers.9.mlp.up_proj.weight": "model-00001-of-00003.safetensors",
913
  "language_model.model.layers.9.mlp.up_proj.weight_scale": "model-00001-of-00003.safetensors",
914
+ "language_model.model.layers.9.post_attention_layernorm.weight": "model-00002-of-00003.safetensors",
915
  "language_model.model.layers.9.self_attn.k_proj.input_scale": "model-00001-of-00003.safetensors",
916
  "language_model.model.layers.9.self_attn.k_proj.weight": "model-00001-of-00003.safetensors",
917
  "language_model.model.layers.9.self_attn.k_proj.weight_scale": "model-00001-of-00003.safetensors",
preprocessor_config.json CHANGED
@@ -16,6 +16,7 @@
16
  0.27577711
17
  ],
18
  "max_image_tiles": 4,
 
19
  "resample": 2,
20
  "rescale_factor": 0.00392156862745098,
21
  "size": {
 
16
  0.27577711
17
  ],
18
  "max_image_tiles": 4,
19
+ "processor_class": "MllamaProcessor",
20
  "resample": 2,
21
  "rescale_factor": 0.00392156862745098,
22
  "size": {