add docs
Browse files
README.md
CHANGED
@@ -422,6 +422,12 @@ log_sweep_max_lr:
|
|
422 |
optimizer:
|
423 |
# specify weight decay
|
424 |
weight_decay:
|
|
|
|
|
|
|
|
|
|
|
|
|
425 |
|
426 |
# whether to bettertransformers
|
427 |
flash_optimum:
|
|
|
422 |
optimizer:
|
423 |
# specify weight decay
|
424 |
weight_decay:
|
425 |
+
# adamw hyperparams
|
426 |
+
adam_beta1:
|
427 |
+
adam_beta2:
|
428 |
+
adam_epsilon:
|
429 |
+
# Gradient clipping max norm
|
430 |
+
max_grad_norm:
|
431 |
|
432 |
# whether to bettertransformers
|
433 |
flash_optimum:
|