prince-canuma commited on
Commit
5c04b3a
·
verified ·
1 Parent(s): a342e38

Upload folder using huggingface_hub

Browse files
Files changed (2) hide show
  1. README.md +1 -1
  2. config.json +12 -70
README.md CHANGED
@@ -16,7 +16,7 @@ library_name: transformers
16
  ---
17
 
18
  # mlx-community/Molmo-7B-D-0924-4bit
19
- This model was converted to MLX format from [`allenai/Molmo-7B-D-0924`]() using mlx-vlm version **0.1.6**.
20
  Refer to the [original model card](https://huggingface.co/allenai/Molmo-7B-D-0924) for more details on the model.
21
  ## Use with mlx
22
 
 
16
  ---
17
 
18
  # mlx-community/Molmo-7B-D-0924-4bit
19
+ This model was converted to MLX format from [`allenai/Molmo-7B-D-0924`]() using mlx-vlm version **0.1.7**.
20
  Refer to the [original model card](https://huggingface.co/allenai/Molmo-7B-D-0924) for more details on the model.
21
  ## Use with mlx
22
 
config.json CHANGED
@@ -19,40 +19,40 @@
19
  "diversity_penalty": 0.0,
20
  "do_sample": false,
21
  "early_stopping": false,
22
- "embedding_size": 50304,
23
  "encoder_no_repeat_ngram_size": 0,
24
  "eos_token_id": null,
25
  "exponential_decay_length_penalty": null,
26
  "finetuning_task": null,
27
  "forced_bos_token_id": null,
28
  "forced_eos_token_id": null,
29
- "hidden_size": 4096,
30
  "id2label": {
31
  "0": "LABEL_0",
32
  "1": "LABEL_1"
33
  },
34
  "initializer_range": 0.02,
35
- "intermediate_size": 11008,
36
  "is_decoder": false,
37
  "is_encoder_decoder": false,
38
  "label2id": {
39
  "LABEL_0": 0,
40
  "LABEL_1": 1
41
  },
42
- "layer_norm_eps": 1e-05,
43
  "layer_norm_type": "rms",
44
  "length_penalty": 1.0,
45
  "max_length": 20,
46
- "max_position_embeddings": 2048,
47
  "min_length": 0,
48
  "model_type": "molmo",
49
  "no_repeat_ngram_size": 0,
50
  "norm_after": false,
51
- "num_attention_heads": 32,
52
  "num_beam_groups": 1,
53
  "num_beams": 1,
54
- "num_hidden_layers": 32,
55
- "num_key_value_heads": null,
56
  "num_return_sequences": 1,
57
  "output_attentions": false,
58
  "output_hidden_states": false,
@@ -61,7 +61,7 @@
61
  "prefix": null,
62
  "problem_type": null,
63
  "pruned_heads": {},
64
- "qkv_bias": false,
65
  "quantization": {
66
  "group_size": 64,
67
  "bits": 4
@@ -70,44 +70,18 @@
70
  "repetition_penalty": 1.0,
71
  "return_dict": true,
72
  "return_dict_in_generate": false,
73
- "rope_theta": 10000.0,
74
  "sep_token_id": null,
75
  "suppress_tokens": null,
76
  "task_specific_params": null,
77
  "temperature": 1.0,
78
- "text_config": {
79
- "model_type": "molmo",
80
- "max_position_embeddings": 4096,
81
- "d_model": 3584,
82
- "n_heads": 28,
83
- "n_kv_heads": 4,
84
- "n_layers": 28,
85
- "mlp_ratio": 4,
86
- "max_sequence_length": 1024,
87
- "act_output_multiplier": 0.5,
88
- "mlp_hidden_size": 37888,
89
- "vocab_size": 152064,
90
- "embedding_size": 152064,
91
- "additional_vocab_size": 128,
92
- "attention_dropout": 0.1,
93
- "residual_dropout": 0.1,
94
- "embedding_dropout": 0.1,
95
- "layer_norm_eps": 1e-05,
96
- "initializer_range": 0.02,
97
- "pad_token_id": -1,
98
- "rope": true,
99
- "rope_theta": 1000000.0,
100
- "weight_tying": false,
101
- "rope_full_precision": true,
102
- "rope_impl": "interleave"
103
- },
104
  "tf_legacy_loss": false,
105
  "tie_encoder_decoder": false,
106
- "tie_word_embeddings": true,
107
  "tokenizer_class": null,
108
  "top_k": 50,
109
  "top_p": 1.0,
110
- "torch_dtype": null,
111
  "torchscript": false,
112
  "transformers_version": "4.47.1",
113
  "typical_p": 1.0,
@@ -115,38 +89,6 @@
115
  "use_cache": true,
116
  "use_position_ids": true,
117
  "vision_config": {
118
- "model_type": "molmo",
119
- "num_channels": 3,
120
- "image_default_input_size": [
121
- 336,
122
- 336
123
- ],
124
- "image_patch_size": 14,
125
- "image_pos_patch_size": 14,
126
- "hidden_size": 18944,
127
- "image_emb_dim": 1024,
128
- "image_num_heads": 16,
129
- "image_num_key_value_heads": 16,
130
- "image_num_layers": 23,
131
- "image_head_dim": 64,
132
- "image_mlp_dim": 4096,
133
- "image_mlp_activations": "gelu",
134
- "image_dropout_rate": 0.0,
135
- "image_num_pos": 577,
136
- "image_norm_eps": 1e-05,
137
- "attention_dropout": 0.0,
138
- "residual_dropout": 0.0,
139
- "initializer_range": 0.02,
140
- "d_model": 3584,
141
- "image_pooling_h": 2,
142
- "image_pooling_w": 2,
143
- "vit_layers": [
144
- -2,
145
- -9
146
- ],
147
- "image_pooling_2d": "attention-meanq",
148
- "image_padding_embed": "pad_and_partial_pad",
149
- "intermediate_size": 588,
150
  "skip_vision_non_divisible": true
151
  },
152
  "vocab_size": 152064,
 
19
  "diversity_penalty": 0.0,
20
  "do_sample": false,
21
  "early_stopping": false,
22
+ "embedding_size": 152064,
23
  "encoder_no_repeat_ngram_size": 0,
24
  "eos_token_id": null,
25
  "exponential_decay_length_penalty": null,
26
  "finetuning_task": null,
27
  "forced_bos_token_id": null,
28
  "forced_eos_token_id": null,
29
+ "hidden_size": 3584,
30
  "id2label": {
31
  "0": "LABEL_0",
32
  "1": "LABEL_1"
33
  },
34
  "initializer_range": 0.02,
35
+ "intermediate_size": 37888,
36
  "is_decoder": false,
37
  "is_encoder_decoder": false,
38
  "label2id": {
39
  "LABEL_0": 0,
40
  "LABEL_1": 1
41
  },
42
+ "layer_norm_eps": 1e-06,
43
  "layer_norm_type": "rms",
44
  "length_penalty": 1.0,
45
  "max_length": 20,
46
+ "max_position_embeddings": 4096,
47
  "min_length": 0,
48
  "model_type": "molmo",
49
  "no_repeat_ngram_size": 0,
50
  "norm_after": false,
51
+ "num_attention_heads": 28,
52
  "num_beam_groups": 1,
53
  "num_beams": 1,
54
+ "num_hidden_layers": 28,
55
+ "num_key_value_heads": 4,
56
  "num_return_sequences": 1,
57
  "output_attentions": false,
58
  "output_hidden_states": false,
 
61
  "prefix": null,
62
  "problem_type": null,
63
  "pruned_heads": {},
64
+ "qkv_bias": true,
65
  "quantization": {
66
  "group_size": 64,
67
  "bits": 4
 
70
  "repetition_penalty": 1.0,
71
  "return_dict": true,
72
  "return_dict_in_generate": false,
73
+ "rope_theta": 1000000.0,
74
  "sep_token_id": null,
75
  "suppress_tokens": null,
76
  "task_specific_params": null,
77
  "temperature": 1.0,
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
78
  "tf_legacy_loss": false,
79
  "tie_encoder_decoder": false,
80
+ "tie_word_embeddings": false,
81
  "tokenizer_class": null,
82
  "top_k": 50,
83
  "top_p": 1.0,
84
+ "torch_dtype": "float32",
85
  "torchscript": false,
86
  "transformers_version": "4.47.1",
87
  "typical_p": 1.0,
 
89
  "use_cache": true,
90
  "use_position_ids": true,
91
  "vision_config": {
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
92
  "skip_vision_non_divisible": true
93
  },
94
  "vocab_size": 152064,