{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.3590033975084936, "eval_steps": 20, "global_step": 300, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.09060022650056625, "grad_norm": 4.824601173400879, "learning_rate": 1.9393939393939395e-05, "loss": 6.3414, "step": 20 }, { "epoch": 0.09060022650056625, "eval_loss": 5.462944984436035, "eval_runtime": 169.7392, "eval_samples_per_second": 2.327, "eval_steps_per_second": 0.583, "step": 20 }, { "epoch": 0.1812004530011325, "grad_norm": 3.5273354053497314, "learning_rate": 1.8585858585858588e-05, "loss": 4.9204, "step": 40 }, { "epoch": 0.1812004530011325, "eval_loss": 4.40663480758667, "eval_runtime": 169.7659, "eval_samples_per_second": 2.327, "eval_steps_per_second": 0.583, "step": 40 }, { "epoch": 0.2718006795016987, "grad_norm": 2.5483546257019043, "learning_rate": 1.7777777777777777e-05, "loss": 4.0241, "step": 60 }, { "epoch": 0.2718006795016987, "eval_loss": 3.6545450687408447, "eval_runtime": 169.7082, "eval_samples_per_second": 2.328, "eval_steps_per_second": 0.583, "step": 60 }, { "epoch": 0.362400906002265, "grad_norm": 1.4762500524520874, "learning_rate": 1.6969696969696972e-05, "loss": 3.4114, "step": 80 }, { "epoch": 0.362400906002265, "eval_loss": 3.1997323036193848, "eval_runtime": 169.459, "eval_samples_per_second": 2.331, "eval_steps_per_second": 0.584, "step": 80 }, { "epoch": 0.45300113250283125, "grad_norm": 1.2465909719467163, "learning_rate": 1.616161616161616e-05, "loss": 3.0527, "step": 100 }, { "epoch": 0.45300113250283125, "eval_loss": 2.9332973957061768, "eval_runtime": 169.4702, "eval_samples_per_second": 2.331, "eval_steps_per_second": 0.584, "step": 100 }, { "epoch": 0.5436013590033975, "grad_norm": 1.2837079763412476, "learning_rate": 1.5353535353535354e-05, "loss": 2.8401, "step": 120 }, { "epoch": 0.5436013590033975, "eval_loss": 2.765261173248291, "eval_runtime": 169.5654, "eval_samples_per_second": 2.329, "eval_steps_per_second": 0.584, "step": 120 }, { "epoch": 0.6342015855039638, "grad_norm": 1.069353699684143, "learning_rate": 1.4545454545454546e-05, "loss": 2.7202, "step": 140 }, { "epoch": 0.6342015855039638, "eval_loss": 2.654095411300659, "eval_runtime": 169.5632, "eval_samples_per_second": 2.33, "eval_steps_per_second": 0.584, "step": 140 }, { "epoch": 0.72480181200453, "grad_norm": 1.0665814876556396, "learning_rate": 1.3737373737373739e-05, "loss": 2.605, "step": 160 }, { "epoch": 0.72480181200453, "eval_loss": 2.576014995574951, "eval_runtime": 169.9075, "eval_samples_per_second": 2.325, "eval_steps_per_second": 0.583, "step": 160 }, { "epoch": 0.8154020385050963, "grad_norm": 1.076709508895874, "learning_rate": 1.2929292929292931e-05, "loss": 2.5533, "step": 180 }, { "epoch": 0.8154020385050963, "eval_loss": 2.519667148590088, "eval_runtime": 169.6071, "eval_samples_per_second": 2.329, "eval_steps_per_second": 0.584, "step": 180 }, { "epoch": 0.9060022650056625, "grad_norm": 1.0686030387878418, "learning_rate": 1.2121212121212122e-05, "loss": 2.5004, "step": 200 }, { "epoch": 0.9060022650056625, "eval_loss": 2.4773340225219727, "eval_runtime": 169.6567, "eval_samples_per_second": 2.328, "eval_steps_per_second": 0.584, "step": 200 }, { "epoch": 0.9966024915062288, "grad_norm": 1.1253015995025635, "learning_rate": 1.1313131313131314e-05, "loss": 2.4613, "step": 220 }, { "epoch": 0.9966024915062288, "eval_loss": 2.444694995880127, "eval_runtime": 169.7657, "eval_samples_per_second": 2.327, "eval_steps_per_second": 0.583, "step": 220 }, { "epoch": 1.087202718006795, "grad_norm": 1.1171083450317383, "learning_rate": 1.0505050505050507e-05, "loss": 2.4456, "step": 240 }, { "epoch": 1.087202718006795, "eval_loss": 2.4184916019439697, "eval_runtime": 169.7027, "eval_samples_per_second": 2.328, "eval_steps_per_second": 0.583, "step": 240 }, { "epoch": 1.1778029445073612, "grad_norm": 1.1789259910583496, "learning_rate": 9.696969696969698e-06, "loss": 2.4151, "step": 260 }, { "epoch": 1.1778029445073612, "eval_loss": 2.397007465362549, "eval_runtime": 169.6356, "eval_samples_per_second": 2.329, "eval_steps_per_second": 0.584, "step": 260 }, { "epoch": 1.2684031710079275, "grad_norm": 1.1507657766342163, "learning_rate": 8.888888888888888e-06, "loss": 2.3943, "step": 280 }, { "epoch": 1.2684031710079275, "eval_loss": 2.3794679641723633, "eval_runtime": 169.6152, "eval_samples_per_second": 2.329, "eval_steps_per_second": 0.584, "step": 280 }, { "epoch": 1.3590033975084936, "grad_norm": 1.1052231788635254, "learning_rate": 8.08080808080808e-06, "loss": 2.3621, "step": 300 }, { "epoch": 1.3590033975084936, "eval_loss": 2.3650312423706055, "eval_runtime": 169.6247, "eval_samples_per_second": 2.329, "eval_steps_per_second": 0.584, "step": 300 } ], "logging_steps": 20, "max_steps": 500, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 20, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.252155720952709e+17, "train_batch_size": 4, "trial_name": null, "trial_params": null }