winglian commited on
Commit
5b0bc48
·
unverified ·
1 Parent(s): 9ec2077

add mistral e2e tests (#649)

Browse files

* mistral e2e tests

* make sure to enable flash attention for the e2e tests

* use latest transformers full sha

* uninstall first

.github/workflows/tests.yml CHANGED
@@ -69,6 +69,7 @@ jobs:
69
 
70
  - name: Install dependencies
71
  run: |
 
72
  pip3 install -U -e .[flash-attn]
73
  pip3 install -r requirements-tests.txt
74
 
 
69
 
70
  - name: Install dependencies
71
  run: |
72
+ pip3 uninstall -y transformers accelerate
73
  pip3 install -U -e .[flash-attn]
74
  pip3 install -r requirements-tests.txt
75
 
requirements.txt CHANGED
@@ -4,7 +4,7 @@ torch==2.0.1
4
  auto-gptq
5
  packaging
6
  peft @ git+https://github.com/huggingface/peft.git
7
- transformers @ git+https://github.com/huggingface/transformers.git@78dd120
8
  bitsandbytes>=0.41.1
9
  accelerate @ git+https://github.com/huggingface/accelerate@80da9cfb09bb3cc9f1b385cb55d6b90d025a5fd9
10
  deepspeed
 
4
  auto-gptq
5
  packaging
6
  peft @ git+https://github.com/huggingface/peft.git
7
+ transformers @ git+https://github.com/huggingface/transformers.git@5e11d72d4d0939138fbabfebe9a69d2061519547
8
  bitsandbytes>=0.41.1
9
  accelerate @ git+https://github.com/huggingface/accelerate@80da9cfb09bb3cc9f1b385cb55d6b90d025a5fd9
10
  deepspeed
tests/e2e/test_mistral.py ADDED
@@ -0,0 +1,208 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ """
2
+ E2E tests for lora llama
3
+ """
4
+
5
+ import logging
6
+ import os
7
+ import tempfile
8
+ import unittest
9
+ from pathlib import Path
10
+
11
+ from transformers.utils import is_torch_bf16_gpu_available
12
+
13
+ from axolotl.cli import load_datasets
14
+ from axolotl.common.cli import TrainerCliArgs
15
+ from axolotl.train import train
16
+ from axolotl.utils.config import normalize_config
17
+ from axolotl.utils.dict import DictDefault
18
+
19
+ LOG = logging.getLogger("axolotl.tests.e2e")
20
+ os.environ["WANDB_DISABLED"] = "true"
21
+
22
+
23
+ class TestMistral(unittest.TestCase):
24
+ """
25
+ Test case for Llama models using LoRA
26
+ """
27
+
28
+ def test_lora(self):
29
+ # pylint: disable=duplicate-code
30
+ output_dir = tempfile.mkdtemp()
31
+ cfg = DictDefault(
32
+ {
33
+ "base_model": "openaccess-ai-collective/tiny-mistral",
34
+ "base_model_config": "openaccess-ai-collective/tiny-mistral",
35
+ "flash_attention": True,
36
+ "sequence_len": 1024,
37
+ "load_in_8bit": True,
38
+ "adapter": "lora",
39
+ "lora_r": 32,
40
+ "lora_alpha": 64,
41
+ "lora_dropout": 0.05,
42
+ "lora_target_linear": True,
43
+ "val_set_size": 0.1,
44
+ "special_tokens": {
45
+ "unk_token": "<unk>",
46
+ "bos_token": "<s>",
47
+ "eos_token": "</s>",
48
+ },
49
+ "datasets": [
50
+ {
51
+ "path": "mhenrichsen/alpaca_2k_test",
52
+ "type": "alpaca",
53
+ },
54
+ ],
55
+ "num_epochs": 2,
56
+ "micro_batch_size": 2,
57
+ "gradient_accumulation_steps": 1,
58
+ "output_dir": output_dir,
59
+ "learning_rate": 0.00001,
60
+ "optimizer": "adamw_torch",
61
+ "lr_scheduler": "cosine",
62
+ "max_steps": 20,
63
+ "save_steps": 10,
64
+ "eval_steps": 10,
65
+ }
66
+ )
67
+ normalize_config(cfg)
68
+ cli_args = TrainerCliArgs()
69
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
70
+
71
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
72
+ assert (Path(output_dir) / "adapter_model.bin").exists()
73
+
74
+ def test_lora_packing(self):
75
+ # pylint: disable=duplicate-code
76
+ output_dir = tempfile.mkdtemp()
77
+ cfg = DictDefault(
78
+ {
79
+ "base_model": "openaccess-ai-collective/tiny-mistral",
80
+ "base_model_config": "openaccess-ai-collective/tiny-mistral",
81
+ "flash_attention": True,
82
+ "sample_packing": True,
83
+ "sequence_len": 1024,
84
+ "load_in_8bit": True,
85
+ "adapter": "lora",
86
+ "lora_r": 32,
87
+ "lora_alpha": 64,
88
+ "lora_dropout": 0.05,
89
+ "lora_target_linear": True,
90
+ "val_set_size": 0.1,
91
+ "special_tokens": {
92
+ "unk_token": "<unk>",
93
+ "bos_token": "<s>",
94
+ "eos_token": "</s>",
95
+ },
96
+ "datasets": [
97
+ {
98
+ "path": "mhenrichsen/alpaca_2k_test",
99
+ "type": "alpaca",
100
+ },
101
+ ],
102
+ "num_epochs": 2,
103
+ "micro_batch_size": 2,
104
+ "gradient_accumulation_steps": 1,
105
+ "output_dir": output_dir,
106
+ "learning_rate": 0.00001,
107
+ "optimizer": "adamw_torch",
108
+ "lr_scheduler": "cosine",
109
+ "max_steps": 20,
110
+ "save_steps": 10,
111
+ "eval_steps": 10,
112
+ }
113
+ )
114
+ normalize_config(cfg)
115
+ cli_args = TrainerCliArgs()
116
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
117
+
118
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
119
+ assert (Path(output_dir) / "adapter_model.bin").exists()
120
+
121
+ def test_ft(self):
122
+ # pylint: disable=duplicate-code
123
+ output_dir = tempfile.mkdtemp()
124
+ cfg = DictDefault(
125
+ {
126
+ "base_model": "openaccess-ai-collective/tiny-mistral",
127
+ "base_model_config": "openaccess-ai-collective/tiny-mistral",
128
+ "flash_attention": True,
129
+ "sequence_len": 1024,
130
+ "val_set_size": 0.1,
131
+ "special_tokens": {
132
+ "unk_token": "<unk>",
133
+ "bos_token": "<s>",
134
+ "eos_token": "</s>",
135
+ },
136
+ "datasets": [
137
+ {
138
+ "path": "mhenrichsen/alpaca_2k_test",
139
+ "type": "alpaca",
140
+ },
141
+ ],
142
+ "num_epochs": 2,
143
+ "micro_batch_size": 2,
144
+ "gradient_accumulation_steps": 1,
145
+ "output_dir": output_dir,
146
+ "learning_rate": 0.00001,
147
+ "optimizer": "adamw_torch",
148
+ "lr_scheduler": "cosine",
149
+ "max_steps": 20,
150
+ "save_steps": 10,
151
+ "eval_steps": 10,
152
+ }
153
+ )
154
+ if is_torch_bf16_gpu_available():
155
+ cfg.bf16 = True
156
+ else:
157
+ cfg.fp16 = True
158
+ normalize_config(cfg)
159
+ cli_args = TrainerCliArgs()
160
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
161
+
162
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
163
+ assert (Path(output_dir) / "pytorch_model.bin").exists()
164
+
165
+ def test_ft_packing(self):
166
+ # pylint: disable=duplicate-code
167
+ output_dir = tempfile.mkdtemp()
168
+ cfg = DictDefault(
169
+ {
170
+ "base_model": "openaccess-ai-collective/tiny-mistral",
171
+ "base_model_config": "openaccess-ai-collective/tiny-mistral",
172
+ "flash_attention": True,
173
+ "sample_packing": True,
174
+ "sequence_len": 1024,
175
+ "val_set_size": 0.1,
176
+ "special_tokens": {
177
+ "unk_token": "<unk>",
178
+ "bos_token": "<s>",
179
+ "eos_token": "</s>",
180
+ },
181
+ "datasets": [
182
+ {
183
+ "path": "mhenrichsen/alpaca_2k_test",
184
+ "type": "alpaca",
185
+ },
186
+ ],
187
+ "num_epochs": 2,
188
+ "micro_batch_size": 2,
189
+ "gradient_accumulation_steps": 1,
190
+ "output_dir": output_dir,
191
+ "learning_rate": 0.00001,
192
+ "optimizer": "adamw_torch",
193
+ "lr_scheduler": "cosine",
194
+ "max_steps": 20,
195
+ "save_steps": 10,
196
+ "eval_steps": 10,
197
+ }
198
+ )
199
+ if is_torch_bf16_gpu_available():
200
+ cfg.bf16 = True
201
+ else:
202
+ cfg.fp16 = True
203
+ normalize_config(cfg)
204
+ cli_args = TrainerCliArgs()
205
+ dataset_meta = load_datasets(cfg=cfg, cli_args=cli_args)
206
+
207
+ train(cfg=cfg, cli_args=cli_args, dataset_meta=dataset_meta)
208
+ assert (Path(output_dir) / "pytorch_model.bin").exists()