nintwentydo
commited on
Commit
•
1d3ee8e
1
Parent(s):
effec59
Update config.json
Browse files- config.json +18 -13
config.json
CHANGED
@@ -1,9 +1,14 @@
|
|
1 |
{
|
2 |
-
"
|
3 |
-
"_name_or_path": "../pixtral",
|
4 |
"architectures": [
|
5 |
"LlavaForConditionalGeneration"
|
6 |
],
|
|
|
|
|
|
|
|
|
|
|
|
|
7 |
"quantization_config": {
|
8 |
"config_groups": {
|
9 |
"group_0": {
|
@@ -227,35 +232,35 @@
|
|
227 |
"quant_method": "compressed-tensors",
|
228 |
"quantization_status": "compressed"
|
229 |
},
|
230 |
-
"ignore_index": -100,
|
231 |
-
"image_seq_length": 1,
|
232 |
-
"image_token_index": 10,
|
233 |
-
"model_type": "llava",
|
234 |
-
"projector_hidden_act": "gelu",
|
235 |
"text_config": {
|
|
|
236 |
"hidden_size": 5120,
|
237 |
-
"head_dim": 128,
|
238 |
"intermediate_size": 14336,
|
239 |
-
"
|
240 |
"max_position_embeddings": 1024000,
|
|
|
241 |
"model_type": "mistral",
|
242 |
"num_hidden_layers": 40,
|
|
|
243 |
"num_key_value_heads": 8,
|
244 |
-
"rms_norm_eps":
|
245 |
-
"rope_theta": 1000000000,
|
246 |
"sliding_window": null,
|
247 |
"vocab_size": 131072
|
248 |
},
|
249 |
"torch_dtype": "bfloat16",
|
250 |
-
"transformers_version":
|
251 |
"vision_config": {
|
252 |
"head_dim": 64,
|
253 |
"hidden_act": "silu",
|
254 |
"image_size": 1024,
|
|
|
255 |
"is_composition": true,
|
256 |
"model_type": "pixtral",
|
257 |
"patch_size": 16,
|
258 |
-
"
|
|
|
|
|
259 |
"tie_word_embeddings": false
|
260 |
},
|
261 |
"vision_feature_layer": -1,
|
|
|
1 |
{
|
2 |
+
"_name_or_path": "mgoin/pixtral-12b",
|
|
|
3 |
"architectures": [
|
4 |
"LlavaForConditionalGeneration"
|
5 |
],
|
6 |
+
"ignore_index": -100,
|
7 |
+
"image_seq_length": 1,
|
8 |
+
"image_token_index": 10,
|
9 |
+
"model_type": "llava",
|
10 |
+
"projector_hidden_act": "gelu",
|
11 |
+
"initializer_range": 0.02,
|
12 |
"quantization_config": {
|
13 |
"config_groups": {
|
14 |
"group_0": {
|
|
|
232 |
"quant_method": "compressed-tensors",
|
233 |
"quantization_status": "compressed"
|
234 |
},
|
|
|
|
|
|
|
|
|
|
|
235 |
"text_config": {
|
236 |
+
"_attn_implementation_autoset": true,
|
237 |
"hidden_size": 5120,
|
|
|
238 |
"intermediate_size": 14336,
|
239 |
+
"head_dim": 128,
|
240 |
"max_position_embeddings": 1024000,
|
241 |
+
"is_composition": true,
|
242 |
"model_type": "mistral",
|
243 |
"num_hidden_layers": 40,
|
244 |
+
"num_attention_heads": 32,
|
245 |
"num_key_value_heads": 8,
|
246 |
+
"rms_norm_eps": 1e-05,
|
247 |
+
"rope_theta": 1000000000.0,
|
248 |
"sliding_window": null,
|
249 |
"vocab_size": 131072
|
250 |
},
|
251 |
"torch_dtype": "bfloat16",
|
252 |
+
"transformers_version": "4.47.1",
|
253 |
"vision_config": {
|
254 |
"head_dim": 64,
|
255 |
"hidden_act": "silu",
|
256 |
"image_size": 1024,
|
257 |
+
"intermediate_size": 4096,
|
258 |
"is_composition": true,
|
259 |
"model_type": "pixtral",
|
260 |
"patch_size": 16,
|
261 |
+
"num_attention_heads": 16,
|
262 |
+
"num_hidden_layers": 24,
|
263 |
+
"rope_theta": 10000.0,
|
264 |
"tie_word_embeddings": false
|
265 |
},
|
266 |
"vision_feature_layer": -1,
|