mgoin commited on
Commit
f68d0d6
·
verified ·
1 Parent(s): 142a46f

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +38 -0
README.md ADDED
@@ -0,0 +1,38 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ Quantized using AutoFP8 with this script:
2
+
3
+ ```python
4
+ from transformers import AutoTokenizer
5
+ import auto_fp8
6
+ from auto_fp8 import AutoFP8ForCausalLM, BaseQuantizeConfig
7
+
8
+ pretrained_model_dir = "ibm-granite/granite-20b-code-base"
9
+ quantized_model_dir = "granite-20b-code-base-FP8"
10
+
11
+ tokenizer = AutoTokenizer.from_pretrained(pretrained_model_dir, use_fast=True)
12
+
13
+ # use some code to calibrate
14
+ import auto_fp8
15
+ tmp = auto_fp8.__file__.split('/')[:-1]
16
+ tmp.append('quantize.py')
17
+ seed_text_file = '/'.join(tmp)
18
+
19
+ with open(seed_text_file, "r") as f:
20
+ text = f.read()
21
+
22
+ examples = [text]
23
+
24
+ examples = tokenizer(examples, return_tensors="pt").to("cuda")
25
+
26
+ quantize_config = BaseQuantizeConfig(
27
+ quant_method="fp8",
28
+ activation_scheme="static",
29
+ ignore_patterns=["re:.*lm_head"],
30
+ )
31
+
32
+ model = AutoFP8ForCausalLM.from_pretrained(
33
+ pretrained_model_dir, quantize_config=quantize_config
34
+ )
35
+
36
+ model.quantize(examples)
37
+ model.save_quantized(quantized_model_dir)
38
+ ```