Yoshiii commited on
Commit
09125e9
·
1 Parent(s): 0d905f2

Create README.md

Browse files
Files changed (1) hide show
  1. README.md +160 -0
README.md ADDED
@@ -0,0 +1,160 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ ---
2
+ license: unlicense
3
+ ---
4
+ Running opt-6.7b with added loras locally on windows!
5
+
6
+ # bitsandbytes
7
+
8
+ I needed to get bitsandbytes working in my venv:
9
+ I replaced the main.py in C:\Users\user\Desktop\test\peft\venv\Lib\site-packages\bitsandbytes\cuda_setup\main.py with the one here!
10
+ I also added a .dll file here: C:\Users\user\Desktop\test\peft\venv\Lib\site-packages\bitsandbytes\libbitsandbytes_cuda116.dll
11
+
12
+
13
+
14
+ # Training Script
15
+
16
+ (https://github.com/huggingface/peft/commit/df0e1fb59266c9903ddd6dbfe7339bcd2068d150) (It's from their notebook!)
17
+
18
+ ```
19
+ #load
20
+
21
+
22
+ import os
23
+ os.environ["CUDA_VISIBLE_DEVICES"]="0"
24
+ import torch
25
+ import torch.nn as nn
26
+ import bitsandbytes as bnb
27
+ from transformers import AutoTokenizer, AutoConfig, AutoModelForCausalLM
28
+
29
+ model = AutoModelForCausalLM.from_pretrained(
30
+ "facebook/opt-6.7b",
31
+ load_in_8bit=True,
32
+ device_map='auto',
33
+ )
34
+
35
+ tokenizer = AutoTokenizer.from_pretrained("facebook/opt-6.7b")
36
+
37
+
38
+ #post-processing
39
+
40
+ for param in model.parameters():
41
+ param.requires_grad = False # freeze the model - train adapters later
42
+ if param.ndim == 1:
43
+ # cast the small parameters (e.g. layernorm) to fp32 for stability
44
+ param.data = param.data.to(torch.float32)
45
+
46
+ model.gradient_checkpointing_enable() # reduce number of stored activations
47
+ model.enable_input_require_grads()
48
+
49
+ class CastOutputToFloat(nn.Sequential):
50
+ def forward(self, x): return super().forward(x).to(torch.float32)
51
+ model.lm_head = CastOutputToFloat(model.lm_head)
52
+
53
+ # apply lora
54
+
55
+ def print_trainable_parameters(model):
56
+ """
57
+ Prints the number of trainable parameters in the model.
58
+ """
59
+ trainable_params = 0
60
+ all_param = 0
61
+ for _, param in model.named_parameters():
62
+ all_param += param.numel()
63
+ if param.requires_grad:
64
+ trainable_params += param.numel()
65
+ print(
66
+ f"trainable params: {trainable_params} || all params: {all_param} || trainable%: {100 * trainable_params / all_param}"
67
+ )
68
+
69
+ # apply lora 2
70
+
71
+ from peft import LoraConfig, get_peft_model
72
+
73
+ config = LoraConfig(
74
+ r=16,
75
+ lora_alpha=32,
76
+ target_modules=["q_proj", "v_proj"],
77
+ lora_dropout=0.05,
78
+ bias="none",
79
+ task_type="CAUSAL_LM"
80
+ )
81
+
82
+ model = get_peft_model(model, config)
83
+ print_trainable_parameters(model)
84
+
85
+ # training
86
+
87
+ import transformers
88
+ from datasets import load_dataset
89
+ data = load_dataset("Abirate/english_quotes")
90
+ data = data.map(lambda samples: tokenizer(samples['quote']), batched=True)
91
+
92
+ trainer = transformers.Trainer(
93
+ model=model,
94
+ train_dataset=data['train'],
95
+ args=transformers.TrainingArguments(
96
+ per_device_train_batch_size=4,
97
+ gradient_accumulation_steps=4,
98
+ warmup_steps=100,
99
+ max_steps=200,
100
+ learning_rate=2e-4,
101
+ fp16=True,
102
+ logging_steps=1,
103
+ output_dir='outputs'
104
+ ),
105
+ data_collator=transformers.DataCollatorForLanguageModeling(tokenizer, mlm=False)
106
+ )
107
+ model.config.use_cache = False # silence the warnings. Please re-enable for inference!
108
+ trainer.train()
109
+
110
+ # push to huggingface txtloras
111
+ model.push_to_hub("Yoshiii/opt-6.7b-lora", use_auth_token=True)
112
+
113
+
114
+ # inference
115
+
116
+ batch = tokenizer("Two things are infinite: ", return_tensors='pt')
117
+
118
+ with torch.cuda.amp.autocast():
119
+ output_tokens = model.generate(**batch, max_new_tokens=50)
120
+
121
+ print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
122
+ ```
123
+
124
+
125
+
126
+
127
+
128
+
129
+
130
+ # Inference (loading this repo lora from hf)
131
+
132
+ ```
133
+ import torch
134
+ from peft import PeftModel, PeftConfig
135
+ from transformers import AutoModelForCausalLM, AutoTokenizer
136
+
137
+ peft_model_id = "Yoshiii/opt-6.7b-lora"
138
+ config = PeftConfig.from_pretrained(peft_model_id)
139
+ model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, load_in_8bit=True, device_map='auto')
140
+ tokenizer = AutoTokenizer.from_pretrained(config.base_model_name_or_path)
141
+
142
+ # Load the Lora model
143
+ model = PeftModel.from_pretrained(model, peft_model_id)
144
+
145
+
146
+ batch = tokenizer("Two things are infinite: ", return_tensors='pt')
147
+
148
+ with torch.cuda.amp.autocast():
149
+ output_tokens = model.generate(**batch, max_new_tokens=50)
150
+
151
+ print('\n\n', tokenizer.decode(output_tokens[0], skip_special_tokens=True))
152
+ ```
153
+
154
+ Two things are infinite: the universe and human stupidity; and I'm not sure about the universe. -Albert Einstein I'm not sure about the universe either.
155
+
156
+
157
+ This output is like the training data. If you run without applying the Lora, it will usually look worse. If you retrain the lora, know that your new lora is not going to output the same results, despite you using the same settings.
158
+ Inference should usually be deterministic when using the same lora, or using without lora.
159
+
160
+