|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.0, |
|
"eval_steps": 397, |
|
"global_step": 2384, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.17, |
|
"eval_gen_len": 81.6102, |
|
"eval_loss": 1.3396695852279663, |
|
"eval_rouge1": 52.6908, |
|
"eval_rouge2": 34.3367, |
|
"eval_rougeL": 43.9351, |
|
"eval_rougeLsum": 44.0153, |
|
"eval_runtime": 41.4692, |
|
"eval_samples_per_second": 1.423, |
|
"eval_steps_per_second": 0.723, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 0.21, |
|
"grad_norm": 6.220447063446045, |
|
"learning_rate": 1.895763422818792e-05, |
|
"loss": 0.719, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"eval_gen_len": 84.2203, |
|
"eval_loss": 1.363059639930725, |
|
"eval_rouge1": 54.543, |
|
"eval_rouge2": 36.4199, |
|
"eval_rougeL": 45.8273, |
|
"eval_rougeLsum": 45.7925, |
|
"eval_runtime": 42.2281, |
|
"eval_samples_per_second": 1.397, |
|
"eval_steps_per_second": 0.71, |
|
"step": 794 |
|
}, |
|
{ |
|
"epoch": 0.42, |
|
"grad_norm": 8.490495681762695, |
|
"learning_rate": 1.7908976510067115e-05, |
|
"loss": 0.7459, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"eval_gen_len": 85.2373, |
|
"eval_loss": 1.3582559823989868, |
|
"eval_rouge1": 53.2262, |
|
"eval_rouge2": 34.8889, |
|
"eval_rougeL": 44.1043, |
|
"eval_rougeLsum": 44.0998, |
|
"eval_runtime": 42.6328, |
|
"eval_samples_per_second": 1.384, |
|
"eval_steps_per_second": 0.704, |
|
"step": 1191 |
|
}, |
|
{ |
|
"epoch": 0.63, |
|
"grad_norm": 6.794929027557373, |
|
"learning_rate": 1.686031879194631e-05, |
|
"loss": 0.7154, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.67, |
|
"eval_gen_len": 83.7797, |
|
"eval_loss": 1.3886514902114868, |
|
"eval_rouge1": 54.9928, |
|
"eval_rouge2": 37.1125, |
|
"eval_rougeL": 46.4105, |
|
"eval_rougeLsum": 46.4044, |
|
"eval_runtime": 42.3464, |
|
"eval_samples_per_second": 1.393, |
|
"eval_steps_per_second": 0.708, |
|
"step": 1588 |
|
}, |
|
{ |
|
"epoch": 0.83, |
|
"eval_gen_len": 85.8814, |
|
"eval_loss": 1.3405011892318726, |
|
"eval_rouge1": 52.5543, |
|
"eval_rouge2": 33.702, |
|
"eval_rougeL": 42.9428, |
|
"eval_rougeLsum": 43.0015, |
|
"eval_runtime": 43.1199, |
|
"eval_samples_per_second": 1.368, |
|
"eval_steps_per_second": 0.696, |
|
"step": 1985 |
|
}, |
|
{ |
|
"epoch": 0.84, |
|
"grad_norm": 8.691970825195312, |
|
"learning_rate": 1.5811661073825504e-05, |
|
"loss": 0.7507, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"eval_gen_len": 81.7797, |
|
"eval_loss": 1.3399206399917603, |
|
"eval_rouge1": 52.4327, |
|
"eval_rouge2": 34.1158, |
|
"eval_rougeL": 43.2742, |
|
"eval_rougeLsum": 43.1693, |
|
"eval_runtime": 41.935, |
|
"eval_samples_per_second": 1.407, |
|
"eval_steps_per_second": 0.715, |
|
"step": 2382 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 9536, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 1192, |
|
"total_flos": 5.17811143698432e+16, |
|
"train_batch_size": 2, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|