upload auto_round format
Browse filesSigned-off-by: n1ck-guo <[email protected]>
- config.json +4 -5
- quantize_config.json +0 -25
config.json
CHANGED
@@ -139,12 +139,11 @@
|
|
139 |
"quantization_config": {
|
140 |
"amp": true,
|
141 |
"autoround_version": "0.4.3",
|
|
|
142 |
"batch_size": 8,
|
143 |
"bits": 4,
|
144 |
-
"block_name_to_quantize": "language.model.layers",
|
145 |
-
"damp_percent": 0.01,
|
146 |
"data_type": "int",
|
147 |
-
"
|
148 |
"enable_minmax_tuning": true,
|
149 |
"enable_norm_bias_tuning": false,
|
150 |
"enable_quanted_input": true,
|
@@ -155,11 +154,11 @@
|
|
155 |
"lr": 0.001,
|
156 |
"minmax_lr": 0.001,
|
157 |
"nsamples": 512,
|
158 |
-
"quant_method": "
|
159 |
"scale_dtype": "torch.float16",
|
160 |
"seqlen": 2048,
|
161 |
"sym": true,
|
162 |
-
"
|
163 |
},
|
164 |
"tile_tag": "2D",
|
165 |
"torch_dtype": "bfloat16",
|
|
|
139 |
"quantization_config": {
|
140 |
"amp": true,
|
141 |
"autoround_version": "0.4.3",
|
142 |
+
"backend": "auto_round:gptq:exllamav2",
|
143 |
"batch_size": 8,
|
144 |
"bits": 4,
|
|
|
|
|
145 |
"data_type": "int",
|
146 |
+
"dataset": "NeelNanda/pile-10k",
|
147 |
"enable_minmax_tuning": true,
|
148 |
"enable_norm_bias_tuning": false,
|
149 |
"enable_quanted_input": true,
|
|
|
154 |
"lr": 0.001,
|
155 |
"minmax_lr": 0.001,
|
156 |
"nsamples": 512,
|
157 |
+
"quant_method": "intel/auto-round",
|
158 |
"scale_dtype": "torch.float16",
|
159 |
"seqlen": 2048,
|
160 |
"sym": true,
|
161 |
+
"to_quant_block_names": "language.model.layers"
|
162 |
},
|
163 |
"tile_tag": "2D",
|
164 |
"torch_dtype": "bfloat16",
|
quantize_config.json
DELETED
@@ -1,25 +0,0 @@
|
|
1 |
-
{
|
2 |
-
"bits": 4,
|
3 |
-
"group_size": 128,
|
4 |
-
"sym": true,
|
5 |
-
"data_type": "int",
|
6 |
-
"enable_quanted_input": true,
|
7 |
-
"enable_minmax_tuning": true,
|
8 |
-
"seqlen": 2048,
|
9 |
-
"batch_size": 8,
|
10 |
-
"scale_dtype": "torch.float16",
|
11 |
-
"lr": 0.001,
|
12 |
-
"minmax_lr": 0.001,
|
13 |
-
"gradient_accumulate_steps": 1,
|
14 |
-
"iters": 1000,
|
15 |
-
"amp": true,
|
16 |
-
"nsamples": 512,
|
17 |
-
"low_gpu_mem_usage": true,
|
18 |
-
"enable_norm_bias_tuning": false,
|
19 |
-
"autoround_version": "0.4.3",
|
20 |
-
"block_name_to_quantize": "language.model.layers",
|
21 |
-
"quant_method": "gptq",
|
22 |
-
"desc_act": false,
|
23 |
-
"true_sequential": false,
|
24 |
-
"damp_percent": 0.01
|
25 |
-
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|