n1ck-guo commited on
Commit
2e595e8
1 Parent(s): e04448b

upload auto_gptq format

Browse files

Signed-off-by: n1ck-guo <[email protected]>

Files changed (2) hide show
  1. config.json +5 -4
  2. quantize_config.json +25 -0
config.json CHANGED
@@ -139,11 +139,12 @@
139
  "quantization_config": {
140
  "amp": true,
141
  "autoround_version": "0.4.3",
142
- "backend": "auto_round:gptq:exllamav2",
143
  "batch_size": 8,
144
  "bits": 4,
 
 
145
  "data_type": "int",
146
- "dataset": "NeelNanda/pile-10k",
147
  "enable_minmax_tuning": true,
148
  "enable_norm_bias_tuning": false,
149
  "enable_quanted_input": true,
@@ -154,11 +155,11 @@
154
  "lr": 0.001,
155
  "minmax_lr": 0.001,
156
  "nsamples": 512,
157
- "quant_method": "intel/auto-round",
158
  "scale_dtype": "torch.float16",
159
  "seqlen": 2048,
160
  "sym": true,
161
- "to_quant_block_names": "language.model.layers"
162
  },
163
  "tile_tag": "2D",
164
  "torch_dtype": "bfloat16",
 
139
  "quantization_config": {
140
  "amp": true,
141
  "autoround_version": "0.4.3",
 
142
  "batch_size": 8,
143
  "bits": 4,
144
+ "block_name_to_quantize": "language.model.layers",
145
+ "damp_percent": 0.01,
146
  "data_type": "int",
147
+ "desc_act": false,
148
  "enable_minmax_tuning": true,
149
  "enable_norm_bias_tuning": false,
150
  "enable_quanted_input": true,
 
155
  "lr": 0.001,
156
  "minmax_lr": 0.001,
157
  "nsamples": 512,
158
+ "quant_method": "gptq",
159
  "scale_dtype": "torch.float16",
160
  "seqlen": 2048,
161
  "sym": true,
162
+ "true_sequential": false
163
  },
164
  "tile_tag": "2D",
165
  "torch_dtype": "bfloat16",
quantize_config.json ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ {
2
+ "bits": 4,
3
+ "group_size": 128,
4
+ "sym": true,
5
+ "data_type": "int",
6
+ "enable_quanted_input": true,
7
+ "enable_minmax_tuning": true,
8
+ "seqlen": 2048,
9
+ "batch_size": 8,
10
+ "scale_dtype": "torch.float16",
11
+ "lr": 0.001,
12
+ "minmax_lr": 0.001,
13
+ "gradient_accumulate_steps": 1,
14
+ "iters": 1000,
15
+ "amp": true,
16
+ "nsamples": 512,
17
+ "low_gpu_mem_usage": true,
18
+ "enable_norm_bias_tuning": false,
19
+ "autoround_version": "0.4.3",
20
+ "block_name_to_quantize": "language.model.layers",
21
+ "quant_method": "gptq",
22
+ "desc_act": false,
23
+ "true_sequential": false,
24
+ "damp_percent": 0.01
25
+ }