winglian commited on
Commit
a125693
·
1 Parent(s): 709be5a

add support for trust_remote_code for mpt models

Browse files
examples/mpt-7b/README.md ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ # MPT-7B
2
+
3
+ ```shell
4
+ accelerate launch scripts/finetune.py examples/mpt-7b/config.yml
5
+
6
+ ```
examples/mpt-7b/config.yml ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ base_model: mosaicml/mpt-7b
2
+ base_model_config: mosaicml/mpt-7b
3
+ model_type: AutoModelForCausalLM
4
+ tokenizer_type: GPTNeoXTokenizer
5
+ trust_remote_code: true # required for mpt as their model class is not merged into transformers yet
6
+ load_in_8bit: false
7
+ datasets:
8
+ - path: vicgalle/alpaca-gpt4
9
+ type: alpaca
10
+ dataset_prepared_path: last_run_prepared
11
+ val_set_size: 0.02
12
+ adapter:
13
+ lora_model_dir:
14
+ sequence_len: 2048
15
+ max_packed_sequence_len:
16
+ lora_r: 8
17
+ lora_alpha: 16
18
+ lora_dropout: 0.05
19
+ lora_target_modules:
20
+ - q_proj
21
+ - v_proj
22
+ lora_fan_in_fan_out: false
23
+ wandb_project: mpt-alpaca-7b
24
+ wandb_watch:
25
+ wandb_run_id:
26
+ wandb_log_model: checkpoint
27
+ output_dir: ./mpt-alpaca-7b
28
+ batch_size: 4
29
+ micro_batch_size: 1
30
+ num_epochs: 3
31
+ optimizer: adamw_bnb_8bit
32
+ torchdistx_path:
33
+ lr_scheduler: cosine
34
+ learning_rate: 0.0000002
35
+ train_on_inputs: false
36
+ group_by_length: false
37
+ bf16: true
38
+ tf32: true
39
+ early_stopping_patience:
40
+ resume_from_checkpoint:
41
+ local_rank:
42
+ logging_steps: 5
43
+ xformers_attention:
44
+ flash_attention:
45
+ gptq_groupsize:
46
+ gptq_model_v1:
47
+ warmup_steps: 20
48
+ eval_steps: 110
49
+ save_steps: 660
50
+ debug:
51
+ deepspeed:
52
+ weight_decay: 0.0001
53
+ fsdp:
54
+ fsdp_config:
55
+ special_tokens:
56
+ pad_token: "<|padding|>"
57
+ bos_token: "<|endoftext|>"
58
+ eos_token: "<|endoftext|>"
59
+ unk_token: "<|endoftext|>"
src/axolotl/utils/models.py CHANGED
@@ -113,6 +113,7 @@ def load_model(
113
  load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
114
  torch_dtype=torch_dtype,
115
  device_map=cfg.device_map,
 
116
  )
117
  else:
118
  model = AutoModelForCausalLM.from_pretrained(
@@ -120,6 +121,7 @@ def load_model(
120
  load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
121
  torch_dtype=torch_dtype,
122
  device_map=cfg.device_map,
 
123
  )
124
  except Exception as e:
125
  logging.error(
@@ -131,6 +133,7 @@ def load_model(
131
  load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
132
  torch_dtype=torch_dtype,
133
  device_map=cfg.device_map,
 
134
  )
135
 
136
  if not tokenizer:
 
113
  load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
114
  torch_dtype=torch_dtype,
115
  device_map=cfg.device_map,
116
+ trust_remote_code=True if cfg.trust_remote_code is True else False,
117
  )
118
  else:
119
  model = AutoModelForCausalLM.from_pretrained(
 
121
  load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
122
  torch_dtype=torch_dtype,
123
  device_map=cfg.device_map,
124
+ trust_remote_code=True if cfg.trust_remote_code is True else False,
125
  )
126
  except Exception as e:
127
  logging.error(
 
133
  load_in_8bit=cfg.load_in_8bit and cfg.adapter is not None,
134
  torch_dtype=torch_dtype,
135
  device_map=cfg.device_map,
136
+ trust_remote_code=True if cfg.trust_remote_code is True else False,
137
  )
138
 
139
  if not tokenizer: