{ "best_metric": null, "best_model_checkpoint": null, "epoch": 12.0, "global_step": 5028, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_bleu": 15.5532, "eval_gen_len": 262.7814, "eval_loss": 2.603503942489624, "eval_runtime": 1071.2105, "eval_samples_per_second": 1.563, "eval_steps_per_second": 0.196, "step": 419 }, { "epoch": 2.0, "eval_bleu": 15.8442, "eval_gen_len": 249.9671, "eval_loss": 2.534031629562378, "eval_runtime": 974.071, "eval_samples_per_second": 1.719, "eval_steps_per_second": 0.216, "step": 838 }, { "epoch": 3.0, "eval_bleu": 15.4747, "eval_gen_len": 274.1971, "eval_loss": 2.492140293121338, "eval_runtime": 1054.8804, "eval_samples_per_second": 1.587, "eval_steps_per_second": 0.199, "step": 1257 }, { "epoch": 4.0, "eval_bleu": 16.1199, "eval_gen_len": 258.5263, "eval_loss": 2.5081911087036133, "eval_runtime": 967.203, "eval_samples_per_second": 1.731, "eval_steps_per_second": 0.217, "step": 1676 }, { "epoch": 5.0, "eval_bleu": 16.2089, "eval_gen_len": 260.6117, "eval_loss": 2.512809991836548, "eval_runtime": 986.3606, "eval_samples_per_second": 1.697, "eval_steps_per_second": 0.213, "step": 2095 }, { "epoch": 6.0, "eval_bleu": 16.0677, "eval_gen_len": 256.6434, "eval_loss": 2.532682180404663, "eval_runtime": 972.4474, "eval_samples_per_second": 1.721, "eval_steps_per_second": 0.216, "step": 2514 }, { "epoch": 7.0, "eval_bleu": 15.9211, "eval_gen_len": 254.4606, "eval_loss": 2.5517630577087402, "eval_runtime": 947.2841, "eval_samples_per_second": 1.767, "eval_steps_per_second": 0.222, "step": 2933 }, { "epoch": 8.0, "eval_bleu": 16.022, "eval_gen_len": 257.5149, "eval_loss": 2.563725709915161, "eval_runtime": 962.17, "eval_samples_per_second": 1.74, "eval_steps_per_second": 0.218, "step": 3352 }, { "epoch": 9.0, "eval_bleu": 16.0511, "eval_gen_len": 258.0084, "eval_loss": 2.5875699520111084, "eval_runtime": 962.6535, "eval_samples_per_second": 1.739, "eval_steps_per_second": 0.218, "step": 3771 }, { "epoch": 10.0, "eval_bleu": 16.0276, "eval_gen_len": 257.9409, "eval_loss": 2.589696168899536, "eval_runtime": 968.2157, "eval_samples_per_second": 1.729, "eval_steps_per_second": 0.217, "step": 4190 }, { "epoch": 11.0, "eval_bleu": 15.9876, "eval_gen_len": 258.5191, "eval_loss": 2.600374221801758, "eval_runtime": 972.9329, "eval_samples_per_second": 1.721, "eval_steps_per_second": 0.216, "step": 4609 }, { "epoch": 11.93, "learning_rate": 2.7844073190135243e-07, "loss": 0.8761, "step": 5000 }, { "epoch": 12.0, "eval_bleu": 16.0242, "eval_gen_len": 259.6051, "eval_loss": 2.603928565979004, "eval_runtime": 980.0259, "eval_samples_per_second": 1.708, "eval_steps_per_second": 0.214, "step": 5028 }, { "epoch": 12.0, "step": 5028, "total_flos": 5449229958905856.0, "train_loss": 0.873852771146011, "train_runtime": 13341.3267, "train_samples_per_second": 3.012, "train_steps_per_second": 0.377 } ], "max_steps": 5028, "num_train_epochs": 12, "total_flos": 5449229958905856.0, "trial_name": null, "trial_params": null }