nintwentydo commited on
Commit
9f2abb4
·
verified ·
1 Parent(s): 0c48b13

Update config.json

Browse files
Files changed (1) hide show
  1. config.json +141 -11
config.json CHANGED
@@ -1,13 +1,8 @@
1
  {
2
- "_name_or_path": "mistral-community/pixtral-12b",
3
  "architectures": [
4
  "LlavaForConditionalGeneration"
5
  ],
6
- "ignore_index": -100,
7
- "image_seq_length": 1,
8
- "image_token_index": 10,
9
- "model_type": "llava",
10
- "projector_hidden_act": "gelu",
11
  "quantization_config": {
12
  "config_groups": {
13
  "group_0": {
@@ -17,7 +12,7 @@
17
  "dynamic": true,
18
  "group_size": null,
19
  "num_bits": 8,
20
- "observer": null,
21
  "observer_kwargs": {},
22
  "strategy": "token",
23
  "symmetric": true,
@@ -42,7 +37,7 @@
42
  }
43
  },
44
  "format": "float-quantized",
45
- "global_compression_ratio": 1.2549863736863405,
46
  "ignore": [
47
  "vision_tower.transformer.layers.0.feed_forward.gate_proj",
48
  "vision_tower.transformer.layers.0.feed_forward.up_proj",
@@ -231,31 +226,166 @@
231
  "quant_method": "compressed-tensors",
232
  "quantization_status": "compressed"
233
  },
 
 
 
 
 
234
  "text_config": {
235
- "hidden_size": 5120,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
236
  "head_dim": 128,
 
 
 
 
 
 
 
237
  "intermediate_size": 14336,
238
  "is_composition": true,
 
 
 
 
 
 
 
 
239
  "max_position_embeddings": 1024000,
 
240
  "model_type": "mistral",
 
 
 
 
241
  "num_hidden_layers": 40,
242
  "num_key_value_heads": 8,
 
 
 
 
 
 
 
 
 
 
 
 
243
  "rms_norm_eps": 1e-05,
244
  "rope_theta": 1000000000.0,
 
245
  "sliding_window": null,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
246
  "vocab_size": 131072
247
  },
248
  "torch_dtype": "bfloat16",
249
- "transformers_version": "4.47.1",
250
  "vision_config": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
251
  "head_dim": 64,
252
  "hidden_act": "silu",
 
 
 
 
 
253
  "image_size": 1024,
 
254
  "is_composition": true,
 
 
 
 
 
 
 
 
 
255
  "model_type": "pixtral",
 
 
 
 
 
 
 
 
 
 
 
256
  "patch_size": 16,
 
 
 
 
 
 
 
257
  "rope_theta": 10000.0,
258
- "tie_word_embeddings": false
 
 
 
 
 
 
 
 
 
 
 
 
 
259
  },
260
  "vision_feature_layer": -1,
261
  "vision_feature_select_strategy": "full"
 
1
  {
2
+ "_name_or_path": "",
3
  "architectures": [
4
  "LlavaForConditionalGeneration"
5
  ],
 
 
 
 
 
6
  "quantization_config": {
7
  "config_groups": {
8
  "group_0": {
 
12
  "dynamic": true,
13
  "group_size": null,
14
  "num_bits": 8,
15
+ "observer": "memoryless",
16
  "observer_kwargs": {},
17
  "strategy": "token",
18
  "symmetric": true,
 
37
  }
38
  },
39
  "format": "float-quantized",
40
+ "global_compression_ratio": 1.1688641443181655,
41
  "ignore": [
42
  "vision_tower.transformer.layers.0.feed_forward.gate_proj",
43
  "vision_tower.transformer.layers.0.feed_forward.up_proj",
 
226
  "quant_method": "compressed-tensors",
227
  "quantization_status": "compressed"
228
  },
229
+ "ignore_index": -100,
230
+ "image_seq_length": 1,
231
+ "image_token_index": 10,
232
+ "model_type": "llava",
233
+ "projector_hidden_act": "gelu",
234
  "text_config": {
235
+ "_name_or_path": "",
236
+ "add_cross_attention": false,
237
+ "architectures": null,
238
+ "attention_dropout": 0.0,
239
+ "bad_words_ids": null,
240
+ "begin_suppress_tokens": null,
241
+ "bos_token_id": 1,
242
+ "chunk_size_feed_forward": 0,
243
+ "cross_attention_hidden_size": null,
244
+ "decoder_start_token_id": null,
245
+ "diversity_penalty": 0.0,
246
+ "do_sample": false,
247
+ "early_stopping": false,
248
+ "encoder_no_repeat_ngram_size": 0,
249
+ "eos_token_id": 2,
250
+ "exponential_decay_length_penalty": null,
251
+ "finetuning_task": null,
252
+ "forced_bos_token_id": null,
253
+ "forced_eos_token_id": null,
254
  "head_dim": 128,
255
+ "hidden_act": "silu",
256
+ "hidden_size": 5120,
257
+ "id2label": {
258
+ "0": "LABEL_0",
259
+ "1": "LABEL_1"
260
+ },
261
+ "initializer_range": 0.02,
262
  "intermediate_size": 14336,
263
  "is_composition": true,
264
+ "is_decoder": false,
265
+ "is_encoder_decoder": false,
266
+ "label2id": {
267
+ "LABEL_0": 0,
268
+ "LABEL_1": 1
269
+ },
270
+ "length_penalty": 1.0,
271
+ "max_length": 20,
272
  "max_position_embeddings": 1024000,
273
+ "min_length": 0,
274
  "model_type": "mistral",
275
+ "no_repeat_ngram_size": 0,
276
+ "num_attention_heads": 32,
277
+ "num_beam_groups": 1,
278
+ "num_beams": 1,
279
  "num_hidden_layers": 40,
280
  "num_key_value_heads": 8,
281
+ "num_return_sequences": 1,
282
+ "output_attentions": false,
283
+ "output_hidden_states": false,
284
+ "output_scores": false,
285
+ "pad_token_id": null,
286
+ "prefix": null,
287
+ "problem_type": null,
288
+ "pruned_heads": {},
289
+ "remove_invalid_values": false,
290
+ "repetition_penalty": 1.0,
291
+ "return_dict": true,
292
+ "return_dict_in_generate": false,
293
  "rms_norm_eps": 1e-05,
294
  "rope_theta": 1000000000.0,
295
+ "sep_token_id": null,
296
  "sliding_window": null,
297
+ "suppress_tokens": null,
298
+ "task_specific_params": null,
299
+ "temperature": 1.0,
300
+ "tf_legacy_loss": false,
301
+ "tie_encoder_decoder": false,
302
+ "tie_word_embeddings": false,
303
+ "tokenizer_class": null,
304
+ "top_k": 50,
305
+ "top_p": 1.0,
306
+ "torch_dtype": null,
307
+ "torchscript": false,
308
+ "typical_p": 1.0,
309
+ "use_bfloat16": false,
310
+ "use_cache": true,
311
  "vocab_size": 131072
312
  },
313
  "torch_dtype": "bfloat16",
314
+ "transformers_version": "4.45.1",
315
  "vision_config": {
316
+ "_name_or_path": "",
317
+ "add_cross_attention": false,
318
+ "architectures": null,
319
+ "attention_dropout": 0.0,
320
+ "bad_words_ids": null,
321
+ "begin_suppress_tokens": null,
322
+ "bos_token_id": null,
323
+ "chunk_size_feed_forward": 0,
324
+ "cross_attention_hidden_size": null,
325
+ "decoder_start_token_id": null,
326
+ "diversity_penalty": 0.0,
327
+ "do_sample": false,
328
+ "early_stopping": false,
329
+ "encoder_no_repeat_ngram_size": 0,
330
+ "eos_token_id": null,
331
+ "exponential_decay_length_penalty": null,
332
+ "finetuning_task": null,
333
+ "forced_bos_token_id": null,
334
+ "forced_eos_token_id": null,
335
  "head_dim": 64,
336
  "hidden_act": "silu",
337
+ "hidden_size": 1024,
338
+ "id2label": {
339
+ "0": "LABEL_0",
340
+ "1": "LABEL_1"
341
+ },
342
  "image_size": 1024,
343
+ "intermediate_size": 4096,
344
  "is_composition": true,
345
+ "is_decoder": false,
346
+ "is_encoder_decoder": false,
347
+ "label2id": {
348
+ "LABEL_0": 0,
349
+ "LABEL_1": 1
350
+ },
351
+ "length_penalty": 1.0,
352
+ "max_length": 20,
353
+ "min_length": 0,
354
  "model_type": "pixtral",
355
+ "no_repeat_ngram_size": 0,
356
+ "num_attention_heads": 16,
357
+ "num_beam_groups": 1,
358
+ "num_beams": 1,
359
+ "num_channels": 3,
360
+ "num_hidden_layers": 24,
361
+ "num_return_sequences": 1,
362
+ "output_attentions": false,
363
+ "output_hidden_states": false,
364
+ "output_scores": false,
365
+ "pad_token_id": null,
366
  "patch_size": 16,
367
+ "prefix": null,
368
+ "problem_type": null,
369
+ "pruned_heads": {},
370
+ "remove_invalid_values": false,
371
+ "repetition_penalty": 1.0,
372
+ "return_dict": true,
373
+ "return_dict_in_generate": false,
374
  "rope_theta": 10000.0,
375
+ "sep_token_id": null,
376
+ "suppress_tokens": null,
377
+ "task_specific_params": null,
378
+ "temperature": 1.0,
379
+ "tf_legacy_loss": false,
380
+ "tie_encoder_decoder": false,
381
+ "tie_word_embeddings": false,
382
+ "tokenizer_class": null,
383
+ "top_k": 50,
384
+ "top_p": 1.0,
385
+ "torch_dtype": null,
386
+ "torchscript": false,
387
+ "typical_p": 1.0,
388
+ "use_bfloat16": false
389
  },
390
  "vision_feature_layer": -1,
391
  "vision_feature_select_strategy": "full"