Brandon Royal commited on
Commit
044a024
·
1 Parent(s): 329b131

updated readme

Browse files
Files changed (1) hide show
  1. README.md +7 -60
README.md CHANGED
@@ -57,22 +57,6 @@ You can find fine-tuning notebooks under the [`examples/` directory](https://hug
57
  * A script to perform SFT using FSDP on TPU devices
58
  * A notebook that you can run on a free-tier Google Colab instance to perform SFT on English quotes dataset. You can also find the copy of the notebook [here](https://github.com/huggingface/notebooks/blob/main/peft/gemma_7b_english_quotes.ipynb).
59
 
60
- #### Running the model on a CPU
61
-
62
-
63
- ```python
64
- from transformers import AutoTokenizer, AutoModelForCausalLM
65
-
66
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
67
- model = AutoModelForCausalLM.from_pretrained("google/gemma-7b")
68
-
69
- input_text = "Write me a poem about Machine Learning."
70
- input_ids = tokenizer(input_text, return_tensors="pt")
71
-
72
- outputs = model.generate(**input_ids)
73
- print(tokenizer.decode(outputs[0]))
74
- ```
75
-
76
 
77
  #### Running the model on a single / multi GPU
78
 
@@ -81,8 +65,8 @@ print(tokenizer.decode(outputs[0]))
81
  # pip install accelerate
82
  from transformers import AutoTokenizer, AutoModelForCausalLM
83
 
84
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
85
- model = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto")
86
 
87
  input_text = "Write me a poem about Machine Learning."
88
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
@@ -99,9 +83,10 @@ print(tokenizer.decode(outputs[0]))
99
  ```python
100
  # pip install accelerate
101
  from transformers import AutoTokenizer, AutoModelForCausalLM
 
102
 
103
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
104
- model = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", torch_dtype=torch.float16)
105
 
106
  input_text = "Write me a poem about Machine Learning."
107
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
@@ -116,46 +101,8 @@ print(tokenizer.decode(outputs[0]))
116
  # pip install accelerate
117
  from transformers import AutoTokenizer, AutoModelForCausalLM
118
 
119
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
120
- model = AutoModelForCausalLM.from_pretrained("google/gemma-7b", device_map="auto", torch_dtype=torch.bfloat16)
121
-
122
- input_text = "Write me a poem about Machine Learning."
123
- input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
124
-
125
- outputs = model.generate(**input_ids)
126
- print(tokenizer.decode(outputs[0]))
127
- ```
128
-
129
- #### Quantized Versions through `bitsandbytes`
130
-
131
- * _Using 8-bit precision (int8)_
132
-
133
- ```python
134
- # pip install bitsandbytes accelerate
135
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
136
-
137
- quantization_config = BitsAndBytesConfig(load_in_8bit=True)
138
-
139
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
140
- model = AutoModelForCausalLM.from_pretrained("google/gemma-7b", quantization_config=quantization_config)
141
-
142
- input_text = "Write me a poem about Machine Learning."
143
- input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
144
-
145
- outputs = model.generate(**input_ids)
146
- print(tokenizer.decode(outputs[0]))
147
- ```
148
-
149
- * _Using 4-bit precision_
150
-
151
- ```python
152
- # pip install bitsandbytes accelerate
153
- from transformers import AutoTokenizer, AutoModelForCausalLM, BitsAndBytesConfig
154
-
155
- quantization_config = BitsAndBytesConfig(load_in_4bit=True)
156
-
157
- tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b")
158
- model = AutoModelForCausalLM.from_pretrained("google/gemma-7b", quantization_config=quantization_config)
159
 
160
  input_text = "Write me a poem about Machine Learning."
161
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
 
57
  * A script to perform SFT using FSDP on TPU devices
58
  * A notebook that you can run on a free-tier Google Colab instance to perform SFT on English quotes dataset. You can also find the copy of the notebook [here](https://github.com/huggingface/notebooks/blob/main/peft/gemma_7b_english_quotes.ipynb).
59
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
60
 
61
  #### Running the model on a single / multi GPU
62
 
 
65
  # pip install accelerate
66
  from transformers import AutoTokenizer, AutoModelForCausalLM
67
 
68
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b-AWQ")
69
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-AWQ", device_map="auto")
70
 
71
  input_text = "Write me a poem about Machine Learning."
72
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
 
83
  ```python
84
  # pip install accelerate
85
  from transformers import AutoTokenizer, AutoModelForCausalLM
86
+ import torch
87
 
88
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b-AWQ")
89
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-AWQ", device_map="auto", torch_dtype=torch.float16)
90
 
91
  input_text = "Write me a poem about Machine Learning."
92
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")
 
101
  # pip install accelerate
102
  from transformers import AutoTokenizer, AutoModelForCausalLM
103
 
104
+ tokenizer = AutoTokenizer.from_pretrained("google/gemma-7b-AWQ")
105
+ model = AutoModelForCausalLM.from_pretrained("google/gemma-7b-AWQ", device_map="auto", torch_dtype=torch.bfloat16)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
106
 
107
  input_text = "Write me a poem about Machine Learning."
108
  input_ids = tokenizer(input_text, return_tensors="pt").to("cuda")