Crystalcareai
commited on
Update modeling_quiet.py
Browse files- modeling_quiet.py +5 -5
modeling_quiet.py
CHANGED
@@ -60,7 +60,7 @@ def model_init(params):
|
|
60 |
trust_remote_code=True,
|
61 |
device_map="auto",
|
62 |
# load_in_4bit=True,
|
63 |
-
# attn_implementation="flash_attention_2",
|
64 |
)
|
65 |
print("Loaded model")
|
66 |
|
@@ -115,7 +115,7 @@ training_args = TrainingArguments(
|
|
115 |
# beta2=0.95,
|
116 |
# auto_find_batch_size=True
|
117 |
learning_rate=2e-07,
|
118 |
-
max_grad_norm=1.0,
|
119 |
warmup_steps=10,
|
120 |
lr_scheduler_type="cosine",
|
121 |
push_to_hub=False,
|
@@ -125,12 +125,12 @@ training_args = TrainingArguments(
|
|
125 |
|
126 |
# Training is currently bugged with lora/qlora
|
127 |
# peft_config = LoraConfig(
|
128 |
-
# r = 16,
|
129 |
# target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
|
130 |
# "gate_proj", "up_proj", "down_proj",],
|
131 |
# lora_alpha = 16,
|
132 |
-
# lora_dropout = 0,
|
133 |
-
# bias = "none",
|
134 |
# use_dora=False,
|
135 |
# )
|
136 |
|
|
|
60 |
trust_remote_code=True,
|
61 |
device_map="auto",
|
62 |
# load_in_4bit=True,
|
63 |
+
# attn_implementation="flash_attention_2", #flash-attn currently unsupported.
|
64 |
)
|
65 |
print("Loaded model")
|
66 |
|
|
|
115 |
# beta2=0.95,
|
116 |
# auto_find_batch_size=True
|
117 |
learning_rate=2e-07,
|
118 |
+
max_grad_norm=1.0,
|
119 |
warmup_steps=10,
|
120 |
lr_scheduler_type="cosine",
|
121 |
push_to_hub=False,
|
|
|
125 |
|
126 |
# Training is currently bugged with lora/qlora
|
127 |
# peft_config = LoraConfig(
|
128 |
+
# r = 16,
|
129 |
# target_modules = ["q_proj", "k_proj", "v_proj", "o_proj",
|
130 |
# "gate_proj", "up_proj", "down_proj",],
|
131 |
# lora_alpha = 16,
|
132 |
+
# lora_dropout = 0,
|
133 |
+
# bias = "none",
|
134 |
# use_dora=False,
|
135 |
# )
|
136 |
|