Crystalcareai commited on
Commit
fb98720
·
verified ·
1 Parent(s): 0b90483

Update modeling_quiet.py

Browse files
Files changed (1) hide show
  1. modeling_quiet.py +5 -5
modeling_quiet.py CHANGED
@@ -60,7 +60,7 @@ def model_init(params):
60
  trust_remote_code=True,
61
  device_map="auto",
62
  # load_in_4bit=True,
63
- # attn_implementation="flash_attention_2",
64
  )
65
  print("Loaded model")
66
 
@@ -115,7 +115,7 @@ training_args = TrainingArguments(
115
  # beta2=0.95,
116
  # auto_find_batch_size=True
117
  learning_rate=2e-07,
118
- max_grad_norm=1.0, # Gradient clipping with a maximum gradient norm of 0.3
119
  warmup_steps=10,
120
  lr_scheduler_type="cosine",
121
  push_to_hub=False,
@@ -125,12 +125,12 @@ training_args = TrainingArguments(
125
 
126
  # Training is currently bugged with lora/qlora
127
  # peft_config = LoraConfig(
128
- # r = 16, # Choose any number > 0 ! Suggested 8, 16, 32, 64, 128
129
  # target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
130
  # "gate_proj", "up_proj", "down_proj",],
131
  # lora_alpha = 16,
132
- # lora_dropout = 0, # Supports any, but = 0 is optimized
133
- # bias = "none", # Enable Dora method
134
  # use_dora=False,
135
  # )
136
 
 
60
  trust_remote_code=True,
61
  device_map="auto",
62
  # load_in_4bit=True,
63
+ # attn_implementation="flash_attention_2", #flash-attn currently unsupported.
64
  )
65
  print("Loaded model")
66
 
 
115
  # beta2=0.95,
116
  # auto_find_batch_size=True
117
  learning_rate=2e-07,
118
+ max_grad_norm=1.0,
119
  warmup_steps=10,
120
  lr_scheduler_type="cosine",
121
  push_to_hub=False,
 
125
 
126
  # Training is currently bugged with lora/qlora
127
  # peft_config = LoraConfig(
128
+ # r = 16,
129
  # target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
130
  # "gate_proj", "up_proj", "down_proj",],
131
  # lora_alpha = 16,
132
+ # lora_dropout = 0,
133
+ # bias = "none",
134
  # use_dora=False,
135
  # )
136