n1ck-guo commited on
Commit
fb77034
1 Parent(s): 2e595e8

upload auto_round format

Browse files

Signed-off-by: n1ck-guo <[email protected]>

Files changed (2) hide show
  1. config.json +4 -5
  2. quantize_config.json +0 -25
config.json CHANGED
@@ -139,12 +139,11 @@
139
  "quantization_config": {
140
  "amp": true,
141
  "autoround_version": "0.4.3",
 
142
  "batch_size": 8,
143
  "bits": 4,
144
- "block_name_to_quantize": "language.model.layers",
145
- "damp_percent": 0.01,
146
  "data_type": "int",
147
- "desc_act": false,
148
  "enable_minmax_tuning": true,
149
  "enable_norm_bias_tuning": false,
150
  "enable_quanted_input": true,
@@ -155,11 +154,11 @@
155
  "lr": 0.001,
156
  "minmax_lr": 0.001,
157
  "nsamples": 512,
158
- "quant_method": "gptq",
159
  "scale_dtype": "torch.float16",
160
  "seqlen": 2048,
161
  "sym": true,
162
- "true_sequential": false
163
  },
164
  "tile_tag": "2D",
165
  "torch_dtype": "bfloat16",
 
139
  "quantization_config": {
140
  "amp": true,
141
  "autoround_version": "0.4.3",
142
+ "backend": "auto_round:gptq:exllamav2",
143
  "batch_size": 8,
144
  "bits": 4,
 
 
145
  "data_type": "int",
146
+ "dataset": "NeelNanda/pile-10k",
147
  "enable_minmax_tuning": true,
148
  "enable_norm_bias_tuning": false,
149
  "enable_quanted_input": true,
 
154
  "lr": 0.001,
155
  "minmax_lr": 0.001,
156
  "nsamples": 512,
157
+ "quant_method": "intel/auto-round",
158
  "scale_dtype": "torch.float16",
159
  "seqlen": 2048,
160
  "sym": true,
161
+ "to_quant_block_names": "language.model.layers"
162
  },
163
  "tile_tag": "2D",
164
  "torch_dtype": "bfloat16",
quantize_config.json DELETED
@@ -1,25 +0,0 @@
1
- {
2
- "bits": 4,
3
- "group_size": 128,
4
- "sym": true,
5
- "data_type": "int",
6
- "enable_quanted_input": true,
7
- "enable_minmax_tuning": true,
8
- "seqlen": 2048,
9
- "batch_size": 8,
10
- "scale_dtype": "torch.float16",
11
- "lr": 0.001,
12
- "minmax_lr": 0.001,
13
- "gradient_accumulate_steps": 1,
14
- "iters": 1000,
15
- "amp": true,
16
- "nsamples": 512,
17
- "low_gpu_mem_usage": true,
18
- "enable_norm_bias_tuning": false,
19
- "autoround_version": "0.4.3",
20
- "block_name_to_quantize": "language.model.layers",
21
- "quant_method": "gptq",
22
- "desc_act": false,
23
- "true_sequential": false,
24
- "damp_percent": 0.01
25
- }