{ "best_metric": null, "best_model_checkpoint": null, "epoch": 9.997955010224949, "global_step": 610, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 1.0, "eval_loss": 39.944950103759766, "eval_runtime": 1.5442, "eval_samples_per_second": 79.651, "eval_steps_per_second": 10.361, "step": 61 }, { "epoch": 2.0, "eval_loss": 39.573768615722656, "eval_runtime": 1.5441, "eval_samples_per_second": 79.656, "eval_steps_per_second": 10.362, "step": 122 }, { "epoch": 3.0, "eval_loss": 40.006507873535156, "eval_runtime": 1.5451, "eval_samples_per_second": 79.606, "eval_steps_per_second": 10.355, "step": 183 }, { "epoch": 4.0, "eval_loss": 39.271480560302734, "eval_runtime": 1.5464, "eval_samples_per_second": 79.54, "eval_steps_per_second": 10.347, "step": 244 }, { "epoch": 5.0, "eval_loss": 38.749183654785156, "eval_runtime": 1.5468, "eval_samples_per_second": 79.518, "eval_steps_per_second": 10.344, "step": 305 }, { "epoch": 6.0, "eval_loss": 38.856666564941406, "eval_runtime": 1.5467, "eval_samples_per_second": 79.525, "eval_steps_per_second": 10.345, "step": 366 }, { "epoch": 7.0, "eval_loss": 38.75959396362305, "eval_runtime": 1.5455, "eval_samples_per_second": 79.588, "eval_steps_per_second": 10.353, "step": 427 }, { "epoch": 8.0, "eval_loss": 38.665225982666016, "eval_runtime": 1.5457, "eval_samples_per_second": 79.578, "eval_steps_per_second": 10.352, "step": 488 }, { "epoch": 8.2, "learning_rate": 9.01639344262295e-09, "loss": 42.1342, "step": 500 }, { "epoch": 9.0, "eval_loss": 38.66032028198242, "eval_runtime": 1.551, "eval_samples_per_second": 79.304, "eval_steps_per_second": 10.316, "step": 549 }, { "epoch": 10.0, "eval_loss": 38.66507339477539, "eval_runtime": 1.5562, "eval_samples_per_second": 79.038, "eval_steps_per_second": 10.281, "step": 610 }, { "epoch": 10.0, "step": 610, "total_flos": 322666370343936.0, "train_loss": 41.80477074795082, "train_runtime": 454.8629, "train_samples_per_second": 10.75, "train_steps_per_second": 1.341 } ], "max_steps": 610, "num_train_epochs": 10, "total_flos": 322666370343936.0, "trial_name": null, "trial_params": null }