sys-lpot-val commited on
Commit
e1f1fbe
·
1 Parent(s): d1bb095

Signed-off-by: sys-lpot-val <[email protected]>

config.json CHANGED
@@ -42,11 +42,11 @@
42
  "quantization_config": {
43
  "amp": true,
44
  "autoround_version": "0.4.0.dev",
 
45
  "batch_size": 8,
46
  "bits": 4,
47
- "damp_percent": 0.01,
48
  "data_type": "int",
49
- "desc_act": false,
50
  "enable_minmax_tuning": true,
51
  "enable_norm_bias_tuning": false,
52
  "enable_quanted_input": true,
@@ -58,12 +58,10 @@
58
  "minmax_lr": 0.001,
59
  "nsamples": 512,
60
  "quant_block_list": null,
61
- "quant_method": "gptq",
62
  "scale_dtype": "torch.float16",
63
  "seqlen": 2048,
64
- "sym": true,
65
- "true_sequential": false,
66
- "block_name_to_quantize":"transformer.encoder.layers"
67
  },
68
  "rmsnorm": true,
69
  "rope_ratio": 500,
 
42
  "quantization_config": {
43
  "amp": true,
44
  "autoround_version": "0.4.0.dev",
45
+ "backend": "auto_round:gptq:exllamav2",
46
  "batch_size": 8,
47
  "bits": 4,
 
48
  "data_type": "int",
49
+ "dataset": "NeelNanda/pile-10k",
50
  "enable_minmax_tuning": true,
51
  "enable_norm_bias_tuning": false,
52
  "enable_quanted_input": true,
 
58
  "minmax_lr": 0.001,
59
  "nsamples": 512,
60
  "quant_block_list": null,
61
+ "quant_method": "intel/auto-round",
62
  "scale_dtype": "torch.float16",
63
  "seqlen": 2048,
64
+ "sym": true
 
 
65
  },
66
  "rmsnorm": true,
67
  "rope_ratio": 500,
quantize_config.json → quantization_config.json RENAMED
@@ -17,9 +17,8 @@
17
  "low_gpu_mem_usage": false,
18
  "quant_block_list": null,
19
  "enable_norm_bias_tuning": false,
 
20
  "autoround_version": "0.4.0.dev",
21
- "quant_method": "gptq",
22
- "desc_act": false,
23
- "true_sequential": false,
24
- "damp_percent": 0.01
25
  }
 
17
  "low_gpu_mem_usage": false,
18
  "quant_block_list": null,
19
  "enable_norm_bias_tuning": false,
20
+ "dataset": "NeelNanda/pile-10k",
21
  "autoround_version": "0.4.0.dev",
22
+ "quant_method": "intel/auto-round",
23
+ "backend": "auto_round:gptq:exllamav2"
 
 
24
  }