nintwentydo commited on
Commit
effec59
·
verified ·
1 Parent(s): 1ff7a22

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +256 -385
config.json CHANGED
@@ -1,392 +1,263 @@
1
  {
2
- "_name_or_path": "",
3
- "architectures": [
4
- "LlavaForConditionalGeneration"
5
- ],
6
- "quantization_config": {
7
- "config_groups": {
8
- "group_0": {
9
- "input_activations": {
10
- "actorder": null,
11
- "block_structure": null,
12
- "dynamic": true,
13
- "group_size": null,
14
- "num_bits": 8,
15
- "observer": "memoryless",
16
- "observer_kwargs": {},
17
- "strategy": "token",
18
- "symmetric": true,
19
- "type": "float"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20
  },
21
- "output_activations": null,
22
- "targets": [
23
- "Linear"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
24
  ],
25
- "weights": {
26
- "actorder": null,
27
- "block_structure": null,
28
- "dynamic": false,
29
- "group_size": null,
30
- "num_bits": 8,
31
- "observer": "minmax",
32
- "observer_kwargs": {},
33
- "strategy": "channel",
34
- "symmetric": true,
35
- "type": "float"
36
- }
37
- }
38
- },
39
- "format": "float-quantized",
40
- "global_compression_ratio": 1.1688641443181655,
41
- "ignore": [
42
- "vision_tower.transformer.layers.0.feed_forward.gate_proj",
43
- "vision_tower.transformer.layers.0.feed_forward.up_proj",
44
- "vision_tower.transformer.layers.0.feed_forward.down_proj",
45
- "vision_tower.transformer.layers.0.attention.k_proj",
46
- "vision_tower.transformer.layers.0.attention.v_proj",
47
- "vision_tower.transformer.layers.0.attention.q_proj",
48
- "vision_tower.transformer.layers.0.attention.o_proj",
49
- "vision_tower.transformer.layers.1.feed_forward.gate_proj",
50
- "vision_tower.transformer.layers.1.feed_forward.up_proj",
51
- "vision_tower.transformer.layers.1.feed_forward.down_proj",
52
- "vision_tower.transformer.layers.1.attention.k_proj",
53
- "vision_tower.transformer.layers.1.attention.v_proj",
54
- "vision_tower.transformer.layers.1.attention.q_proj",
55
- "vision_tower.transformer.layers.1.attention.o_proj",
56
- "vision_tower.transformer.layers.2.feed_forward.gate_proj",
57
- "vision_tower.transformer.layers.2.feed_forward.up_proj",
58
- "vision_tower.transformer.layers.2.feed_forward.down_proj",
59
- "vision_tower.transformer.layers.2.attention.k_proj",
60
- "vision_tower.transformer.layers.2.attention.v_proj",
61
- "vision_tower.transformer.layers.2.attention.q_proj",
62
- "vision_tower.transformer.layers.2.attention.o_proj",
63
- "vision_tower.transformer.layers.3.feed_forward.gate_proj",
64
- "vision_tower.transformer.layers.3.feed_forward.up_proj",
65
- "vision_tower.transformer.layers.3.feed_forward.down_proj",
66
- "vision_tower.transformer.layers.3.attention.k_proj",
67
- "vision_tower.transformer.layers.3.attention.v_proj",
68
- "vision_tower.transformer.layers.3.attention.q_proj",
69
- "vision_tower.transformer.layers.3.attention.o_proj",
70
- "vision_tower.transformer.layers.4.feed_forward.gate_proj",
71
- "vision_tower.transformer.layers.4.feed_forward.up_proj",
72
- "vision_tower.transformer.layers.4.feed_forward.down_proj",
73
- "vision_tower.transformer.layers.4.attention.k_proj",
74
- "vision_tower.transformer.layers.4.attention.v_proj",
75
- "vision_tower.transformer.layers.4.attention.q_proj",
76
- "vision_tower.transformer.layers.4.attention.o_proj",
77
- "vision_tower.transformer.layers.5.feed_forward.gate_proj",
78
- "vision_tower.transformer.layers.5.feed_forward.up_proj",
79
- "vision_tower.transformer.layers.5.feed_forward.down_proj",
80
- "vision_tower.transformer.layers.5.attention.k_proj",
81
- "vision_tower.transformer.layers.5.attention.v_proj",
82
- "vision_tower.transformer.layers.5.attention.q_proj",
83
- "vision_tower.transformer.layers.5.attention.o_proj",
84
- "vision_tower.transformer.layers.6.feed_forward.gate_proj",
85
- "vision_tower.transformer.layers.6.feed_forward.up_proj",
86
- "vision_tower.transformer.layers.6.feed_forward.down_proj",
87
- "vision_tower.transformer.layers.6.attention.k_proj",
88
- "vision_tower.transformer.layers.6.attention.v_proj",
89
- "vision_tower.transformer.layers.6.attention.q_proj",
90
- "vision_tower.transformer.layers.6.attention.o_proj",
91
- "vision_tower.transformer.layers.7.feed_forward.gate_proj",
92
- "vision_tower.transformer.layers.7.feed_forward.up_proj",
93
- "vision_tower.transformer.layers.7.feed_forward.down_proj",
94
- "vision_tower.transformer.layers.7.attention.k_proj",
95
- "vision_tower.transformer.layers.7.attention.v_proj",
96
- "vision_tower.transformer.layers.7.attention.q_proj",
97
- "vision_tower.transformer.layers.7.attention.o_proj",
98
- "vision_tower.transformer.layers.8.feed_forward.gate_proj",
99
- "vision_tower.transformer.layers.8.feed_forward.up_proj",
100
- "vision_tower.transformer.layers.8.feed_forward.down_proj",
101
- "vision_tower.transformer.layers.8.attention.k_proj",
102
- "vision_tower.transformer.layers.8.attention.v_proj",
103
- "vision_tower.transformer.layers.8.attention.q_proj",
104
- "vision_tower.transformer.layers.8.attention.o_proj",
105
- "vision_tower.transformer.layers.9.feed_forward.gate_proj",
106
- "vision_tower.transformer.layers.9.feed_forward.up_proj",
107
- "vision_tower.transformer.layers.9.feed_forward.down_proj",
108
- "vision_tower.transformer.layers.9.attention.k_proj",
109
- "vision_tower.transformer.layers.9.attention.v_proj",
110
- "vision_tower.transformer.layers.9.attention.q_proj",
111
- "vision_tower.transformer.layers.9.attention.o_proj",
112
- "vision_tower.transformer.layers.10.feed_forward.gate_proj",
113
- "vision_tower.transformer.layers.10.feed_forward.up_proj",
114
- "vision_tower.transformer.layers.10.feed_forward.down_proj",
115
- "vision_tower.transformer.layers.10.attention.k_proj",
116
- "vision_tower.transformer.layers.10.attention.v_proj",
117
- "vision_tower.transformer.layers.10.attention.q_proj",
118
- "vision_tower.transformer.layers.10.attention.o_proj",
119
- "vision_tower.transformer.layers.11.feed_forward.gate_proj",
120
- "vision_tower.transformer.layers.11.feed_forward.up_proj",
121
- "vision_tower.transformer.layers.11.feed_forward.down_proj",
122
- "vision_tower.transformer.layers.11.attention.k_proj",
123
- "vision_tower.transformer.layers.11.attention.v_proj",
124
- "vision_tower.transformer.layers.11.attention.q_proj",
125
- "vision_tower.transformer.layers.11.attention.o_proj",
126
- "vision_tower.transformer.layers.12.feed_forward.gate_proj",
127
- "vision_tower.transformer.layers.12.feed_forward.up_proj",
128
- "vision_tower.transformer.layers.12.feed_forward.down_proj",
129
- "vision_tower.transformer.layers.12.attention.k_proj",
130
- "vision_tower.transformer.layers.12.attention.v_proj",
131
- "vision_tower.transformer.layers.12.attention.q_proj",
132
- "vision_tower.transformer.layers.12.attention.o_proj",
133
- "vision_tower.transformer.layers.13.feed_forward.gate_proj",
134
- "vision_tower.transformer.layers.13.feed_forward.up_proj",
135
- "vision_tower.transformer.layers.13.feed_forward.down_proj",
136
- "vision_tower.transformer.layers.13.attention.k_proj",
137
- "vision_tower.transformer.layers.13.attention.v_proj",
138
- "vision_tower.transformer.layers.13.attention.q_proj",
139
- "vision_tower.transformer.layers.13.attention.o_proj",
140
- "vision_tower.transformer.layers.14.feed_forward.gate_proj",
141
- "vision_tower.transformer.layers.14.feed_forward.up_proj",
142
- "vision_tower.transformer.layers.14.feed_forward.down_proj",
143
- "vision_tower.transformer.layers.14.attention.k_proj",
144
- "vision_tower.transformer.layers.14.attention.v_proj",
145
- "vision_tower.transformer.layers.14.attention.q_proj",
146
- "vision_tower.transformer.layers.14.attention.o_proj",
147
- "vision_tower.transformer.layers.15.feed_forward.gate_proj",
148
- "vision_tower.transformer.layers.15.feed_forward.up_proj",
149
- "vision_tower.transformer.layers.15.feed_forward.down_proj",
150
- "vision_tower.transformer.layers.15.attention.k_proj",
151
- "vision_tower.transformer.layers.15.attention.v_proj",
152
- "vision_tower.transformer.layers.15.attention.q_proj",
153
- "vision_tower.transformer.layers.15.attention.o_proj",
154
- "vision_tower.transformer.layers.16.feed_forward.gate_proj",
155
- "vision_tower.transformer.layers.16.feed_forward.up_proj",
156
- "vision_tower.transformer.layers.16.feed_forward.down_proj",
157
- "vision_tower.transformer.layers.16.attention.k_proj",
158
- "vision_tower.transformer.layers.16.attention.v_proj",
159
- "vision_tower.transformer.layers.16.attention.q_proj",
160
- "vision_tower.transformer.layers.16.attention.o_proj",
161
- "vision_tower.transformer.layers.17.feed_forward.gate_proj",
162
- "vision_tower.transformer.layers.17.feed_forward.up_proj",
163
- "vision_tower.transformer.layers.17.feed_forward.down_proj",
164
- "vision_tower.transformer.layers.17.attention.k_proj",
165
- "vision_tower.transformer.layers.17.attention.v_proj",
166
- "vision_tower.transformer.layers.17.attention.q_proj",
167
- "vision_tower.transformer.layers.17.attention.o_proj",
168
- "vision_tower.transformer.layers.18.feed_forward.gate_proj",
169
- "vision_tower.transformer.layers.18.feed_forward.up_proj",
170
- "vision_tower.transformer.layers.18.feed_forward.down_proj",
171
- "vision_tower.transformer.layers.18.attention.k_proj",
172
- "vision_tower.transformer.layers.18.attention.v_proj",
173
- "vision_tower.transformer.layers.18.attention.q_proj",
174
- "vision_tower.transformer.layers.18.attention.o_proj",
175
- "vision_tower.transformer.layers.19.feed_forward.gate_proj",
176
- "vision_tower.transformer.layers.19.feed_forward.up_proj",
177
- "vision_tower.transformer.layers.19.feed_forward.down_proj",
178
- "vision_tower.transformer.layers.19.attention.k_proj",
179
- "vision_tower.transformer.layers.19.attention.v_proj",
180
- "vision_tower.transformer.layers.19.attention.q_proj",
181
- "vision_tower.transformer.layers.19.attention.o_proj",
182
- "vision_tower.transformer.layers.20.feed_forward.gate_proj",
183
- "vision_tower.transformer.layers.20.feed_forward.up_proj",
184
- "vision_tower.transformer.layers.20.feed_forward.down_proj",
185
- "vision_tower.transformer.layers.20.attention.k_proj",
186
- "vision_tower.transformer.layers.20.attention.v_proj",
187
- "vision_tower.transformer.layers.20.attention.q_proj",
188
- "vision_tower.transformer.layers.20.attention.o_proj",
189
- "vision_tower.transformer.layers.21.feed_forward.gate_proj",
190
- "vision_tower.transformer.layers.21.feed_forward.up_proj",
191
- "vision_tower.transformer.layers.21.feed_forward.down_proj",
192
- "vision_tower.transformer.layers.21.attention.k_proj",
193
- "vision_tower.transformer.layers.21.attention.v_proj",
194
- "vision_tower.transformer.layers.21.attention.q_proj",
195
- "vision_tower.transformer.layers.21.attention.o_proj",
196
- "vision_tower.transformer.layers.22.feed_forward.gate_proj",
197
- "vision_tower.transformer.layers.22.feed_forward.up_proj",
198
- "vision_tower.transformer.layers.22.feed_forward.down_proj",
199
- "vision_tower.transformer.layers.22.attention.k_proj",
200
- "vision_tower.transformer.layers.22.attention.v_proj",
201
- "vision_tower.transformer.layers.22.attention.q_proj",
202
- "vision_tower.transformer.layers.22.attention.o_proj",
203
- "vision_tower.transformer.layers.23.feed_forward.gate_proj",
204
- "vision_tower.transformer.layers.23.feed_forward.up_proj",
205
- "vision_tower.transformer.layers.23.feed_forward.down_proj",
206
- "vision_tower.transformer.layers.23.attention.k_proj",
207
- "vision_tower.transformer.layers.23.attention.v_proj",
208
- "vision_tower.transformer.layers.23.attention.q_proj",
209
- "vision_tower.transformer.layers.23.attention.o_proj",
210
- "multi_modal_projector.linear_1",
211
- "multi_modal_projector.linear_2",
212
- "language_model.lm_head"
213
- ],
214
- "kv_cache_scheme": {
215
- "actorder": null,
216
- "block_structure": null,
217
- "dynamic": false,
218
- "group_size": null,
219
- "num_bits": 8,
220
- "observer": "minmax",
221
- "observer_kwargs": {},
222
- "strategy": "tensor",
223
- "symmetric": true,
224
- "type": "float"
225
- },
226
- "quant_method": "compressed-tensors",
227
- "quantization_status": "compressed"
228
- },
229
- "ignore_index": -100,
230
- "image_seq_length": 1,
231
- "image_token_index": 10,
232
- "model_type": "llava",
233
- "projector_hidden_act": "gelu",
234
- "text_config": {
235
- "_name_or_path": "",
236
- "add_cross_attention": false,
237
- "architectures": null,
238
- "attention_dropout": 0.0,
239
- "bad_words_ids": null,
240
- "begin_suppress_tokens": null,
241
- "bos_token_id": 1,
242
- "chunk_size_feed_forward": 0,
243
- "cross_attention_hidden_size": null,
244
- "decoder_start_token_id": null,
245
- "diversity_penalty": 0.0,
246
- "do_sample": false,
247
- "early_stopping": false,
248
- "encoder_no_repeat_ngram_size": 0,
249
- "eos_token_id": 2,
250
- "exponential_decay_length_penalty": null,
251
- "finetuning_task": null,
252
- "forced_bos_token_id": null,
253
- "forced_eos_token_id": null,
254
- "head_dim": 128,
255
- "hidden_act": "silu",
256
- "hidden_size": 5120,
257
- "id2label": {
258
- "0": "LABEL_0",
259
- "1": "LABEL_1"
260
- },
261
- "initializer_range": 0.02,
262
- "intermediate_size": 14336,
263
- "is_composition": true,
264
- "is_decoder": false,
265
- "is_encoder_decoder": false,
266
- "label2id": {
267
- "LABEL_0": 0,
268
- "LABEL_1": 1
269
  },
270
- "length_penalty": 1.0,
271
- "max_length": 20,
272
- "max_position_embeddings": 1024000,
273
- "min_length": 0,
274
- "model_type": "mistral",
275
- "no_repeat_ngram_size": 0,
276
- "num_attention_heads": 32,
277
- "num_beam_groups": 1,
278
- "num_beams": 1,
279
- "num_hidden_layers": 40,
280
- "num_key_value_heads": 8,
281
- "num_return_sequences": 1,
282
- "output_attentions": false,
283
- "output_hidden_states": false,
284
- "output_scores": false,
285
- "pad_token_id": null,
286
- "prefix": null,
287
- "problem_type": null,
288
- "pruned_heads": {},
289
- "remove_invalid_values": false,
290
- "repetition_penalty": 1.0,
291
- "return_dict": true,
292
- "return_dict_in_generate": false,
293
- "rms_norm_eps": 1e-05,
294
- "rope_theta": 1000000000.0,
295
- "sep_token_id": null,
296
- "sliding_window": null,
297
- "suppress_tokens": null,
298
- "task_specific_params": null,
299
- "temperature": 1.0,
300
- "tf_legacy_loss": false,
301
- "tie_encoder_decoder": false,
302
- "tie_word_embeddings": false,
303
- "tokenizer_class": null,
304
- "top_k": 50,
305
- "top_p": 1.0,
306
- "torch_dtype": null,
307
- "torchscript": false,
308
- "typical_p": 1.0,
309
- "use_bfloat16": false,
310
- "use_cache": true,
311
- "vocab_size": 131072
312
- },
313
- "torch_dtype": "bfloat16",
314
- "transformers_version": "4.45.1",
315
- "vision_config": {
316
- "_name_or_path": "",
317
- "add_cross_attention": false,
318
- "architectures": null,
319
- "attention_dropout": 0.0,
320
- "bad_words_ids": null,
321
- "begin_suppress_tokens": null,
322
- "bos_token_id": null,
323
- "chunk_size_feed_forward": 0,
324
- "cross_attention_hidden_size": null,
325
- "decoder_start_token_id": null,
326
- "diversity_penalty": 0.0,
327
- "do_sample": false,
328
- "early_stopping": false,
329
- "encoder_no_repeat_ngram_size": 0,
330
- "eos_token_id": null,
331
- "exponential_decay_length_penalty": null,
332
- "finetuning_task": null,
333
- "forced_bos_token_id": null,
334
- "forced_eos_token_id": null,
335
- "head_dim": 64,
336
- "hidden_act": "silu",
337
- "hidden_size": 1024,
338
- "id2label": {
339
- "0": "LABEL_0",
340
- "1": "LABEL_1"
341
  },
342
- "image_size": 1024,
343
- "intermediate_size": 4096,
344
- "is_composition": true,
345
- "is_decoder": false,
346
- "is_encoder_decoder": false,
347
- "label2id": {
348
- "LABEL_0": 0,
349
- "LABEL_1": 1
 
 
 
350
  },
351
- "length_penalty": 1.0,
352
- "max_length": 20,
353
- "min_length": 0,
354
- "model_type": "pixtral",
355
- "no_repeat_ngram_size": 0,
356
- "num_attention_heads": 16,
357
- "num_beam_groups": 1,
358
- "num_beams": 1,
359
- "num_channels": 3,
360
- "num_hidden_layers": 24,
361
- "num_return_sequences": 1,
362
- "output_attentions": false,
363
- "output_hidden_states": false,
364
- "output_scores": false,
365
- "pad_token_id": null,
366
- "patch_size": 16,
367
- "prefix": null,
368
- "problem_type": null,
369
- "pruned_heads": {},
370
- "remove_invalid_values": false,
371
- "repetition_penalty": 1.0,
372
- "return_dict": true,
373
- "return_dict_in_generate": false,
374
- "rope_theta": 10000.0,
375
- "sep_token_id": null,
376
- "suppress_tokens": null,
377
- "task_specific_params": null,
378
- "temperature": 1.0,
379
- "tf_legacy_loss": false,
380
- "tie_encoder_decoder": false,
381
- "tie_word_embeddings": false,
382
- "tokenizer_class": null,
383
- "top_k": 50,
384
- "top_p": 1.0,
385
- "torch_dtype": null,
386
- "torchscript": false,
387
- "typical_p": 1.0,
388
- "use_bfloat16": false
389
- },
390
- "vision_feature_layer": -1,
391
- "vision_feature_select_strategy": "full"
392
  }
 
1
  {
2
+ "_commit_hash": null,
3
+ "_name_or_path": "../pixtral",
4
+ "architectures": [
5
+ "LlavaForConditionalGeneration"
6
+ ],
7
+ "quantization_config": {
8
+ "config_groups": {
9
+ "group_0": {
10
+ "input_activations": {
11
+ "actorder": null,
12
+ "block_structure": null,
13
+ "dynamic": true,
14
+ "group_size": null,
15
+ "num_bits": 8,
16
+ "observer": "memoryless",
17
+ "observer_kwargs": {},
18
+ "strategy": "token",
19
+ "symmetric": true,
20
+ "type": "float"
21
+ },
22
+ "output_activations": null,
23
+ "targets": [
24
+ "Linear"
25
+ ],
26
+ "weights": {
27
+ "actorder": null,
28
+ "block_structure": null,
29
+ "dynamic": false,
30
+ "group_size": null,
31
+ "num_bits": 8,
32
+ "observer": "minmax",
33
+ "observer_kwargs": {},
34
+ "strategy": "channel",
35
+ "symmetric": true,
36
+ "type": "float"
37
+ }
38
+ }
39
  },
40
+ "format": "float-quantized",
41
+ "global_compression_ratio": 1.1688641443181655,
42
+ "ignore": [
43
+ "vision_tower.transformer.layers.0.feed_forward.gate_proj",
44
+ "vision_tower.transformer.layers.0.feed_forward.up_proj",
45
+ "vision_tower.transformer.layers.0.feed_forward.down_proj",
46
+ "vision_tower.transformer.layers.0.attention.k_proj",
47
+ "vision_tower.transformer.layers.0.attention.v_proj",
48
+ "vision_tower.transformer.layers.0.attention.q_proj",
49
+ "vision_tower.transformer.layers.0.attention.o_proj",
50
+ "vision_tower.transformer.layers.1.feed_forward.gate_proj",
51
+ "vision_tower.transformer.layers.1.feed_forward.up_proj",
52
+ "vision_tower.transformer.layers.1.feed_forward.down_proj",
53
+ "vision_tower.transformer.layers.1.attention.k_proj",
54
+ "vision_tower.transformer.layers.1.attention.v_proj",
55
+ "vision_tower.transformer.layers.1.attention.q_proj",
56
+ "vision_tower.transformer.layers.1.attention.o_proj",
57
+ "vision_tower.transformer.layers.2.feed_forward.gate_proj",
58
+ "vision_tower.transformer.layers.2.feed_forward.up_proj",
59
+ "vision_tower.transformer.layers.2.feed_forward.down_proj",
60
+ "vision_tower.transformer.layers.2.attention.k_proj",
61
+ "vision_tower.transformer.layers.2.attention.v_proj",
62
+ "vision_tower.transformer.layers.2.attention.q_proj",
63
+ "vision_tower.transformer.layers.2.attention.o_proj",
64
+ "vision_tower.transformer.layers.3.feed_forward.gate_proj",
65
+ "vision_tower.transformer.layers.3.feed_forward.up_proj",
66
+ "vision_tower.transformer.layers.3.feed_forward.down_proj",
67
+ "vision_tower.transformer.layers.3.attention.k_proj",
68
+ "vision_tower.transformer.layers.3.attention.v_proj",
69
+ "vision_tower.transformer.layers.3.attention.q_proj",
70
+ "vision_tower.transformer.layers.3.attention.o_proj",
71
+ "vision_tower.transformer.layers.4.feed_forward.gate_proj",
72
+ "vision_tower.transformer.layers.4.feed_forward.up_proj",
73
+ "vision_tower.transformer.layers.4.feed_forward.down_proj",
74
+ "vision_tower.transformer.layers.4.attention.k_proj",
75
+ "vision_tower.transformer.layers.4.attention.v_proj",
76
+ "vision_tower.transformer.layers.4.attention.q_proj",
77
+ "vision_tower.transformer.layers.4.attention.o_proj",
78
+ "vision_tower.transformer.layers.5.feed_forward.gate_proj",
79
+ "vision_tower.transformer.layers.5.feed_forward.up_proj",
80
+ "vision_tower.transformer.layers.5.feed_forward.down_proj",
81
+ "vision_tower.transformer.layers.5.attention.k_proj",
82
+ "vision_tower.transformer.layers.5.attention.v_proj",
83
+ "vision_tower.transformer.layers.5.attention.q_proj",
84
+ "vision_tower.transformer.layers.5.attention.o_proj",
85
+ "vision_tower.transformer.layers.6.feed_forward.gate_proj",
86
+ "vision_tower.transformer.layers.6.feed_forward.up_proj",
87
+ "vision_tower.transformer.layers.6.feed_forward.down_proj",
88
+ "vision_tower.transformer.layers.6.attention.k_proj",
89
+ "vision_tower.transformer.layers.6.attention.v_proj",
90
+ "vision_tower.transformer.layers.6.attention.q_proj",
91
+ "vision_tower.transformer.layers.6.attention.o_proj",
92
+ "vision_tower.transformer.layers.7.feed_forward.gate_proj",
93
+ "vision_tower.transformer.layers.7.feed_forward.up_proj",
94
+ "vision_tower.transformer.layers.7.feed_forward.down_proj",
95
+ "vision_tower.transformer.layers.7.attention.k_proj",
96
+ "vision_tower.transformer.layers.7.attention.v_proj",
97
+ "vision_tower.transformer.layers.7.attention.q_proj",
98
+ "vision_tower.transformer.layers.7.attention.o_proj",
99
+ "vision_tower.transformer.layers.8.feed_forward.gate_proj",
100
+ "vision_tower.transformer.layers.8.feed_forward.up_proj",
101
+ "vision_tower.transformer.layers.8.feed_forward.down_proj",
102
+ "vision_tower.transformer.layers.8.attention.k_proj",
103
+ "vision_tower.transformer.layers.8.attention.v_proj",
104
+ "vision_tower.transformer.layers.8.attention.q_proj",
105
+ "vision_tower.transformer.layers.8.attention.o_proj",
106
+ "vision_tower.transformer.layers.9.feed_forward.gate_proj",
107
+ "vision_tower.transformer.layers.9.feed_forward.up_proj",
108
+ "vision_tower.transformer.layers.9.feed_forward.down_proj",
109
+ "vision_tower.transformer.layers.9.attention.k_proj",
110
+ "vision_tower.transformer.layers.9.attention.v_proj",
111
+ "vision_tower.transformer.layers.9.attention.q_proj",
112
+ "vision_tower.transformer.layers.9.attention.o_proj",
113
+ "vision_tower.transformer.layers.10.feed_forward.gate_proj",
114
+ "vision_tower.transformer.layers.10.feed_forward.up_proj",
115
+ "vision_tower.transformer.layers.10.feed_forward.down_proj",
116
+ "vision_tower.transformer.layers.10.attention.k_proj",
117
+ "vision_tower.transformer.layers.10.attention.v_proj",
118
+ "vision_tower.transformer.layers.10.attention.q_proj",
119
+ "vision_tower.transformer.layers.10.attention.o_proj",
120
+ "vision_tower.transformer.layers.11.feed_forward.gate_proj",
121
+ "vision_tower.transformer.layers.11.feed_forward.up_proj",
122
+ "vision_tower.transformer.layers.11.feed_forward.down_proj",
123
+ "vision_tower.transformer.layers.11.attention.k_proj",
124
+ "vision_tower.transformer.layers.11.attention.v_proj",
125
+ "vision_tower.transformer.layers.11.attention.q_proj",
126
+ "vision_tower.transformer.layers.11.attention.o_proj",
127
+ "vision_tower.transformer.layers.12.feed_forward.gate_proj",
128
+ "vision_tower.transformer.layers.12.feed_forward.up_proj",
129
+ "vision_tower.transformer.layers.12.feed_forward.down_proj",
130
+ "vision_tower.transformer.layers.12.attention.k_proj",
131
+ "vision_tower.transformer.layers.12.attention.v_proj",
132
+ "vision_tower.transformer.layers.12.attention.q_proj",
133
+ "vision_tower.transformer.layers.12.attention.o_proj",
134
+ "vision_tower.transformer.layers.13.feed_forward.gate_proj",
135
+ "vision_tower.transformer.layers.13.feed_forward.up_proj",
136
+ "vision_tower.transformer.layers.13.feed_forward.down_proj",
137
+ "vision_tower.transformer.layers.13.attention.k_proj",
138
+ "vision_tower.transformer.layers.13.attention.v_proj",
139
+ "vision_tower.transformer.layers.13.attention.q_proj",
140
+ "vision_tower.transformer.layers.13.attention.o_proj",
141
+ "vision_tower.transformer.layers.14.feed_forward.gate_proj",
142
+ "vision_tower.transformer.layers.14.feed_forward.up_proj",
143
+ "vision_tower.transformer.layers.14.feed_forward.down_proj",
144
+ "vision_tower.transformer.layers.14.attention.k_proj",
145
+ "vision_tower.transformer.layers.14.attention.v_proj",
146
+ "vision_tower.transformer.layers.14.attention.q_proj",
147
+ "vision_tower.transformer.layers.14.attention.o_proj",
148
+ "vision_tower.transformer.layers.15.feed_forward.gate_proj",
149
+ "vision_tower.transformer.layers.15.feed_forward.up_proj",
150
+ "vision_tower.transformer.layers.15.feed_forward.down_proj",
151
+ "vision_tower.transformer.layers.15.attention.k_proj",
152
+ "vision_tower.transformer.layers.15.attention.v_proj",
153
+ "vision_tower.transformer.layers.15.attention.q_proj",
154
+ "vision_tower.transformer.layers.15.attention.o_proj",
155
+ "vision_tower.transformer.layers.16.feed_forward.gate_proj",
156
+ "vision_tower.transformer.layers.16.feed_forward.up_proj",
157
+ "vision_tower.transformer.layers.16.feed_forward.down_proj",
158
+ "vision_tower.transformer.layers.16.attention.k_proj",
159
+ "vision_tower.transformer.layers.16.attention.v_proj",
160
+ "vision_tower.transformer.layers.16.attention.q_proj",
161
+ "vision_tower.transformer.layers.16.attention.o_proj",
162
+ "vision_tower.transformer.layers.17.feed_forward.gate_proj",
163
+ "vision_tower.transformer.layers.17.feed_forward.up_proj",
164
+ "vision_tower.transformer.layers.17.feed_forward.down_proj",
165
+ "vision_tower.transformer.layers.17.attention.k_proj",
166
+ "vision_tower.transformer.layers.17.attention.v_proj",
167
+ "vision_tower.transformer.layers.17.attention.q_proj",
168
+ "vision_tower.transformer.layers.17.attention.o_proj",
169
+ "vision_tower.transformer.layers.18.feed_forward.gate_proj",
170
+ "vision_tower.transformer.layers.18.feed_forward.up_proj",
171
+ "vision_tower.transformer.layers.18.feed_forward.down_proj",
172
+ "vision_tower.transformer.layers.18.attention.k_proj",
173
+ "vision_tower.transformer.layers.18.attention.v_proj",
174
+ "vision_tower.transformer.layers.18.attention.q_proj",
175
+ "vision_tower.transformer.layers.18.attention.o_proj",
176
+ "vision_tower.transformer.layers.19.feed_forward.gate_proj",
177
+ "vision_tower.transformer.layers.19.feed_forward.up_proj",
178
+ "vision_tower.transformer.layers.19.feed_forward.down_proj",
179
+ "vision_tower.transformer.layers.19.attention.k_proj",
180
+ "vision_tower.transformer.layers.19.attention.v_proj",
181
+ "vision_tower.transformer.layers.19.attention.q_proj",
182
+ "vision_tower.transformer.layers.19.attention.o_proj",
183
+ "vision_tower.transformer.layers.20.feed_forward.gate_proj",
184
+ "vision_tower.transformer.layers.20.feed_forward.up_proj",
185
+ "vision_tower.transformer.layers.20.feed_forward.down_proj",
186
+ "vision_tower.transformer.layers.20.attention.k_proj",
187
+ "vision_tower.transformer.layers.20.attention.v_proj",
188
+ "vision_tower.transformer.layers.20.attention.q_proj",
189
+ "vision_tower.transformer.layers.20.attention.o_proj",
190
+ "vision_tower.transformer.layers.21.feed_forward.gate_proj",
191
+ "vision_tower.transformer.layers.21.feed_forward.up_proj",
192
+ "vision_tower.transformer.layers.21.feed_forward.down_proj",
193
+ "vision_tower.transformer.layers.21.attention.k_proj",
194
+ "vision_tower.transformer.layers.21.attention.v_proj",
195
+ "vision_tower.transformer.layers.21.attention.q_proj",
196
+ "vision_tower.transformer.layers.21.attention.o_proj",
197
+ "vision_tower.transformer.layers.22.feed_forward.gate_proj",
198
+ "vision_tower.transformer.layers.22.feed_forward.up_proj",
199
+ "vision_tower.transformer.layers.22.feed_forward.down_proj",
200
+ "vision_tower.transformer.layers.22.attention.k_proj",
201
+ "vision_tower.transformer.layers.22.attention.v_proj",
202
+ "vision_tower.transformer.layers.22.attention.q_proj",
203
+ "vision_tower.transformer.layers.22.attention.o_proj",
204
+ "vision_tower.transformer.layers.23.feed_forward.gate_proj",
205
+ "vision_tower.transformer.layers.23.feed_forward.up_proj",
206
+ "vision_tower.transformer.layers.23.feed_forward.down_proj",
207
+ "vision_tower.transformer.layers.23.attention.k_proj",
208
+ "vision_tower.transformer.layers.23.attention.v_proj",
209
+ "vision_tower.transformer.layers.23.attention.q_proj",
210
+ "vision_tower.transformer.layers.23.attention.o_proj",
211
+ "multi_modal_projector.linear_1",
212
+ "multi_modal_projector.linear_2",
213
+ "language_model.lm_head"
214
  ],
215
+ "kv_cache_scheme": {
216
+ "actorder": null,
217
+ "block_structure": null,
218
+ "dynamic": false,
219
+ "group_size": null,
220
+ "num_bits": 8,
221
+ "observer": "minmax",
222
+ "observer_kwargs": {},
223
+ "strategy": "tensor",
224
+ "symmetric": true,
225
+ "type": "float"
226
+ },
227
+ "quant_method": "compressed-tensors",
228
+ "quantization_status": "compressed"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
229
  },
230
+ "ignore_index": -100,
231
+ "image_seq_length": 1,
232
+ "image_token_index": 10,
233
+ "model_type": "llava",
234
+ "projector_hidden_act": "gelu",
235
+ "text_config": {
236
+ "hidden_size": 5120,
237
+ "head_dim": 128,
238
+ "intermediate_size": 14336,
239
+ "is_composition": true,
240
+ "max_position_embeddings": 1024000,
241
+ "model_type": "mistral",
242
+ "num_hidden_layers": 40,
243
+ "num_key_value_heads": 8,
244
+ "rms_norm_eps": 0.00001,
245
+ "rope_theta": 1000000000,
246
+ "sliding_window": null,
247
+ "vocab_size": 131072
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
248
  },
249
+ "torch_dtype": "bfloat16",
250
+ "transformers_version": null,
251
+ "vision_config": {
252
+ "head_dim": 64,
253
+ "hidden_act": "silu",
254
+ "image_size": 1024,
255
+ "is_composition": true,
256
+ "model_type": "pixtral",
257
+ "patch_size": 16,
258
+ "rope_theta": 10000,
259
+ "tie_word_embeddings": false
260
  },
261
+ "vision_feature_layer": -1,
262
+ "vision_feature_select_strategy": "full"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
263
  }