winglian commited on
Commit
a10a826
·
1 Parent(s): 9105935

fix log sweep lr

Browse files
src/axolotl/utils/schedulers.py CHANGED
@@ -19,16 +19,15 @@ class InterpolatingLogScheduler(LRScheduler):
19
  self.num_steps = num_steps
20
  self.min_lr = min_lr
21
  self.max_lr = max_lr
22
- self.q = (max_lr / min_lr) ** (1 / num_steps - 1)
23
  super().__init__(optimizer, last_epoch)
24
 
25
  def get_lr(self):
26
- if self.last_epoch == 0:
27
- lr = self.min_lr
28
  elif self.last_epoch < self.num_steps:
29
- # FIXME, not perfect as we need to account for number of steps are in an epoch, etc
30
- lr = self.min_lr * (self.q ** self.last_epoch)
31
  else:
32
- lr = self.max_lr
33
 
34
- return [lr for _ in self.base_lrs]
 
19
  self.num_steps = num_steps
20
  self.min_lr = min_lr
21
  self.max_lr = max_lr
22
+ self.q = (max_lr / min_lr) ** (1 / (num_steps - 1))
23
  super().__init__(optimizer, last_epoch)
24
 
25
  def get_lr(self):
26
+ if self.last_epoch <= 0:
27
+ lrs = [self.min_lr for base_lr in self.base_lrs]
28
  elif self.last_epoch < self.num_steps:
29
+ lrs = [self.min_lr * (self.q ** (self.last_epoch - 1)) for base_lr in self.base_lrs]
 
30
  else:
31
+ lrs = [self.max_lr for base_lr in self.base_lrs]
32
 
33
+ return lrs
src/axolotl/utils/trainer.py CHANGED
@@ -86,6 +86,7 @@ def setup_trainer(cfg, train_dataset, eval_dataset, model, tokenizer):
86
  training_args = transformers.TrainingArguments(
87
  per_device_train_batch_size=cfg.micro_batch_size,
88
  gradient_accumulation_steps=cfg.gradient_accumulation_steps,
 
89
  num_train_epochs=cfg.num_epochs,
90
  learning_rate=cfg.learning_rate,
91
  evaluation_strategy="steps" if cfg.val_set_size > 0 else "no",
 
86
  training_args = transformers.TrainingArguments(
87
  per_device_train_batch_size=cfg.micro_batch_size,
88
  gradient_accumulation_steps=cfg.gradient_accumulation_steps,
89
+ eval_accumulation_steps=cfg.gradient_accumulation_steps,
90
  num_train_epochs=cfg.num_epochs,
91
  learning_rate=cfg.learning_rate,
92
  evaluation_strategy="steps" if cfg.val_set_size > 0 else "no",