n1ck-guo commited on
Commit
e04448b
1 Parent(s): c3c6260

Signed-off-by: n1ck-guo <[email protected]>

config.json ADDED
@@ -0,0 +1,174 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "_name_or_path": "/data6/models/deepseek-vl2",
3
+ "architectures": [
4
+ "DeepseekVLV2ForCausalLM"
5
+ ],
6
+ "candidate_resolutions": [
7
+ [
8
+ 384,
9
+ 384
10
+ ],
11
+ [
12
+ 384,
13
+ 768
14
+ ],
15
+ [
16
+ 768,
17
+ 384
18
+ ],
19
+ [
20
+ 384,
21
+ 1152
22
+ ],
23
+ [
24
+ 1152,
25
+ 384
26
+ ],
27
+ [
28
+ 384,
29
+ 1536
30
+ ],
31
+ [
32
+ 1536,
33
+ 384
34
+ ],
35
+ [
36
+ 768,
37
+ 768
38
+ ],
39
+ [
40
+ 384,
41
+ 1920
42
+ ],
43
+ [
44
+ 1920,
45
+ 384
46
+ ],
47
+ [
48
+ 384,
49
+ 2304
50
+ ],
51
+ [
52
+ 2304,
53
+ 384
54
+ ],
55
+ [
56
+ 768,
57
+ 1152
58
+ ],
59
+ [
60
+ 1152,
61
+ 768
62
+ ],
63
+ [
64
+ 384,
65
+ 2688
66
+ ],
67
+ [
68
+ 2688,
69
+ 384
70
+ ],
71
+ [
72
+ 384,
73
+ 3072
74
+ ],
75
+ [
76
+ 3072,
77
+ 384
78
+ ],
79
+ [
80
+ 768,
81
+ 1536
82
+ ],
83
+ [
84
+ 1536,
85
+ 768
86
+ ],
87
+ [
88
+ 384,
89
+ 3456
90
+ ],
91
+ [
92
+ 3456,
93
+ 384
94
+ ],
95
+ [
96
+ 1152,
97
+ 1152
98
+ ]
99
+ ],
100
+ "global_view_pos": "head",
101
+ "language_config": {
102
+ "_attn_implementation_autoset": true,
103
+ "architectures": [
104
+ "DeepseekV2ForCausalLM"
105
+ ],
106
+ "auto_map": {
107
+ "AutoConfig": "configuration_deepseek.DeepseekV2Config",
108
+ "AutoModel": "modeling_deepseek.DeepseekV2Model",
109
+ "AutoModelForCausalLM": "modeling_deepseek.DeepseekV2ForCausalLM"
110
+ },
111
+ "bos_token_id": 0,
112
+ "eos_token_id": 1,
113
+ "first_k_dense_replace": 1,
114
+ "hidden_size": 2560,
115
+ "intermediate_size": 12288,
116
+ "lm_head": false,
117
+ "max_position_embeddings": 4096,
118
+ "model_type": "deepseek_v2",
119
+ "moe_intermediate_size": 1536,
120
+ "n_group": 1,
121
+ "n_routed_experts": 72,
122
+ "n_shared_experts": 2,
123
+ "norm_topk_prob": true,
124
+ "num_experts_per_tok": 6,
125
+ "q_lora_rank": null,
126
+ "rm_head": false,
127
+ "routed_scaling_factor": 2.0,
128
+ "scoring_func": "sigmoid",
129
+ "topk_group": 1,
130
+ "topk_method": "noaux_tc",
131
+ "torch_dtype": "bfloat16",
132
+ "vocab_size": 129280
133
+ },
134
+ "model_type": "deepseek_vl_v2",
135
+ "projector_config": {
136
+ "model_type": "mlp_projector",
137
+ "n_embed": 2560
138
+ },
139
+ "quantization_config": {
140
+ "amp": true,
141
+ "autoround_version": "0.4.3",
142
+ "backend": "auto_round:gptq:exllamav2",
143
+ "batch_size": 8,
144
+ "bits": 4,
145
+ "data_type": "int",
146
+ "dataset": "NeelNanda/pile-10k",
147
+ "enable_minmax_tuning": true,
148
+ "enable_norm_bias_tuning": false,
149
+ "enable_quanted_input": true,
150
+ "gradient_accumulate_steps": 1,
151
+ "group_size": 128,
152
+ "iters": 1000,
153
+ "low_gpu_mem_usage": true,
154
+ "lr": 0.001,
155
+ "minmax_lr": 0.001,
156
+ "nsamples": 512,
157
+ "quant_method": "intel/auto-round",
158
+ "scale_dtype": "torch.float16",
159
+ "seqlen": 2048,
160
+ "sym": true,
161
+ "to_quant_block_names": "language.model.layers"
162
+ },
163
+ "tile_tag": "2D",
164
+ "torch_dtype": "bfloat16",
165
+ "transformers_version": "4.47.1",
166
+ "vision_config": {
167
+ "layers": 27,
168
+ "mlp_ratio": 3.7362,
169
+ "model_name": "siglip_so400m_patch14_384",
170
+ "model_type": "vision",
171
+ "patch_size": 14,
172
+ "width": 1152
173
+ }
174
+ }
generation_config.json ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ {
2
+ "_from_model_config": true,
3
+ "transformers_version": "4.47.1"
4
+ }
model-00001-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:816384be37851b60bc1004746f9c85be0ef9620d60e414152fd5d5a1e54469eb
3
+ size 5000833000
model-00002-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8916966202ffeba1e8cd67befa9fd61c046b6592b31ffcced765e59ad010c6bb
3
+ size 5000696320
model-00003-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1f8f1bf0923d4c00a476e4155f884c00df9165dfa6f6591c2d86dac1b523a272
3
+ size 5001230512
model-00004-of-00004.safetensors ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:7743d97e86f01bbd900442ea11fb2d8a199ff9525875a78a2016967af94b5017
3
+ size 984490016
model.safetensors.index.json ADDED
The diff for this file is too large to render. See raw diff
 
processor_config.json ADDED
@@ -0,0 +1,116 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "add_special_token": false,
3
+ "candidate_resolutions": [
4
+ [
5
+ 384,
6
+ 384
7
+ ],
8
+ [
9
+ 384,
10
+ 768
11
+ ],
12
+ [
13
+ 768,
14
+ 384
15
+ ],
16
+ [
17
+ 384,
18
+ 1152
19
+ ],
20
+ [
21
+ 1152,
22
+ 384
23
+ ],
24
+ [
25
+ 384,
26
+ 1536
27
+ ],
28
+ [
29
+ 1536,
30
+ 384
31
+ ],
32
+ [
33
+ 768,
34
+ 768
35
+ ],
36
+ [
37
+ 384,
38
+ 1920
39
+ ],
40
+ [
41
+ 1920,
42
+ 384
43
+ ],
44
+ [
45
+ 384,
46
+ 2304
47
+ ],
48
+ [
49
+ 2304,
50
+ 384
51
+ ],
52
+ [
53
+ 768,
54
+ 1152
55
+ ],
56
+ [
57
+ 1152,
58
+ 768
59
+ ],
60
+ [
61
+ 384,
62
+ 2688
63
+ ],
64
+ [
65
+ 2688,
66
+ 384
67
+ ],
68
+ [
69
+ 384,
70
+ 3072
71
+ ],
72
+ [
73
+ 3072,
74
+ 384
75
+ ],
76
+ [
77
+ 768,
78
+ 1536
79
+ ],
80
+ [
81
+ 1536,
82
+ 768
83
+ ],
84
+ [
85
+ 384,
86
+ 3456
87
+ ],
88
+ [
89
+ 3456,
90
+ 384
91
+ ],
92
+ [
93
+ 1152,
94
+ 1152
95
+ ]
96
+ ],
97
+ "downsample_ratio": 2,
98
+ "ignore_id": -100,
99
+ "image_mean": [
100
+ 0.5,
101
+ 0.5,
102
+ 0.5
103
+ ],
104
+ "image_std": [
105
+ 0.5,
106
+ 0.5,
107
+ 0.5
108
+ ],
109
+ "image_token": "<image>",
110
+ "mask_prompt": false,
111
+ "normalize": true,
112
+ "pad_token": "<\uff5c\u2581pad\u2581\uff5c>",
113
+ "patch_size": 14,
114
+ "processor_class": "DeepseekVLV2Processor",
115
+ "sft_format": "deepseek"
116
+ }
quantization_config.json ADDED
@@ -0,0 +1,24 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "sym": true,
5
+ "data_type": "int",
6
+ "enable_quanted_input": true,
7
+ "enable_minmax_tuning": true,
8
+ "seqlen": 2048,
9
+ "batch_size": 8,
10
+ "scale_dtype": "torch.float16",
11
+ "lr": 0.001,
12
+ "minmax_lr": 0.001,
13
+ "gradient_accumulate_steps": 1,
14
+ "iters": 1000,
15
+ "amp": true,
16
+ "nsamples": 512,
17
+ "low_gpu_mem_usage": true,
18
+ "to_quant_block_names": "language.model.layers",
19
+ "enable_norm_bias_tuning": false,
20
+ "dataset": "NeelNanda/pile-10k",
21
+ "autoround_version": "0.4.3",
22
+ "quant_method": "intel/auto-round",
23
+ "backend": "auto_round:gptq:exllamav2"
24
+ }
special_tokens_map.json ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "additional_special_tokens": [
3
+ {
4
+ "content": "<|User|>",
5
+ "lstrip": false,
6
+ "normalized": false,
7
+ "rstrip": false,
8
+ "single_word": false
9
+ },
10
+ {
11
+ "content": "<|Assistant|>",
12
+ "lstrip": false,
13
+ "normalized": false,
14
+ "rstrip": false,
15
+ "single_word": false
16
+ }
17
+ ],
18
+ "bos_token": {
19
+ "content": "<|begin▁of▁sentence|>",
20
+ "lstrip": false,
21
+ "normalized": false,
22
+ "rstrip": false,
23
+ "single_word": false
24
+ },
25
+ "eos_token": {
26
+ "content": "<|end▁of▁sentence|>",
27
+ "lstrip": false,
28
+ "normalized": false,
29
+ "rstrip": false,
30
+ "single_word": false
31
+ },
32
+ "pad_token": {
33
+ "content": "<|▁pad▁|>",
34
+ "lstrip": false,
35
+ "normalized": false,
36
+ "rstrip": false,
37
+ "single_word": false
38
+ }
39
+ }
tokenizer.json ADDED
The diff for this file is too large to render. See raw diff
 
tokenizer_config.json ADDED
The diff for this file is too large to render. See raw diff