{ "best_metric": null, "best_model_checkpoint": null, "epoch": 3.0, "eval_steps": 3000, "global_step": 44343, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 6.76544212164265e-05, "grad_norm": 7.09375, "learning_rate": 0.0002999932345578783, "loss": 4.25, "step": 1 }, { "epoch": 0.20296326364927947, "grad_norm": 0.88671875, "learning_rate": 0.000279703673635072, "loss": 2.7986, "step": 3000 }, { "epoch": 0.20296326364927947, "eval_loss": 2.3799755573272705, "eval_runtime": 61.2814, "eval_samples_per_second": 1540.516, "eval_steps_per_second": 6.021, "step": 3000 }, { "epoch": 0.40592652729855894, "grad_norm": 0.77734375, "learning_rate": 0.0002594073472701441, "loss": 2.7128, "step": 6000 }, { "epoch": 0.40592652729855894, "eval_loss": 2.3596713542938232, "eval_runtime": 61.2743, "eval_samples_per_second": 1540.694, "eval_steps_per_second": 6.022, "step": 6000 }, { "epoch": 0.6088897909478385, "grad_norm": 1.0078125, "learning_rate": 0.0002391110209052161, "loss": 2.691, "step": 9000 }, { "epoch": 0.6088897909478385, "eval_loss": 2.348450183868408, "eval_runtime": 61.2066, "eval_samples_per_second": 1542.4, "eval_steps_per_second": 6.029, "step": 9000 }, { "epoch": 0.8118530545971179, "grad_norm": 0.9140625, "learning_rate": 0.0002188146945402882, "loss": 2.6832, "step": 12000 }, { "epoch": 0.8118530545971179, "eval_loss": 2.3406801223754883, "eval_runtime": 61.3132, "eval_samples_per_second": 1539.719, "eval_steps_per_second": 6.018, "step": 12000 }, { "epoch": 1.0148163182463974, "grad_norm": 0.8671875, "learning_rate": 0.00019851836817536025, "loss": 2.6763, "step": 15000 }, { "epoch": 1.0148163182463974, "eval_loss": 2.3388381004333496, "eval_runtime": 61.2522, "eval_samples_per_second": 1541.25, "eval_steps_per_second": 6.024, "step": 15000 }, { "epoch": 1.217779581895677, "grad_norm": 0.8359375, "learning_rate": 0.0001782220418104323, "loss": 2.6714, "step": 18000 }, { "epoch": 1.217779581895677, "eval_loss": 2.3343918323516846, "eval_runtime": 61.2808, "eval_samples_per_second": 1540.532, "eval_steps_per_second": 6.021, "step": 18000 }, { "epoch": 1.4207428455449564, "grad_norm": 1.1796875, "learning_rate": 0.00015792571544550436, "loss": 2.6693, "step": 21000 }, { "epoch": 1.4207428455449564, "eval_loss": 2.333566188812256, "eval_runtime": 61.2712, "eval_samples_per_second": 1540.773, "eval_steps_per_second": 6.022, "step": 21000 }, { "epoch": 1.6237061091942357, "grad_norm": 0.8359375, "learning_rate": 0.0001376293890805764, "loss": 2.6672, "step": 24000 }, { "epoch": 1.6237061091942357, "eval_loss": 2.3340320587158203, "eval_runtime": 61.1843, "eval_samples_per_second": 1542.961, "eval_steps_per_second": 6.031, "step": 24000 }, { "epoch": 1.8266693728435153, "grad_norm": 0.93359375, "learning_rate": 0.00011733306271564845, "loss": 2.6654, "step": 27000 }, { "epoch": 1.8266693728435153, "eval_loss": 2.3319995403289795, "eval_runtime": 61.2774, "eval_samples_per_second": 1540.618, "eval_steps_per_second": 6.022, "step": 27000 }, { "epoch": 2.029632636492795, "grad_norm": 0.83984375, "learning_rate": 9.703673635072052e-05, "loss": 2.6655, "step": 30000 }, { "epoch": 2.029632636492795, "eval_loss": 2.3317666053771973, "eval_runtime": 61.4137, "eval_samples_per_second": 1537.199, "eval_steps_per_second": 6.008, "step": 30000 }, { "epoch": 2.232595900142074, "grad_norm": 0.88671875, "learning_rate": 7.674040998579256e-05, "loss": 2.6628, "step": 33000 }, { "epoch": 2.232595900142074, "eval_loss": 2.3321053981781006, "eval_runtime": 61.2773, "eval_samples_per_second": 1540.618, "eval_steps_per_second": 6.022, "step": 33000 }, { "epoch": 2.435559163791354, "grad_norm": 0.765625, "learning_rate": 5.644408362086462e-05, "loss": 2.666, "step": 36000 }, { "epoch": 2.435559163791354, "eval_loss": 2.3318090438842773, "eval_runtime": 61.27, "eval_samples_per_second": 1540.804, "eval_steps_per_second": 6.023, "step": 36000 }, { "epoch": 2.638522427440633, "grad_norm": 0.80078125, "learning_rate": 3.614775725593667e-05, "loss": 2.6666, "step": 39000 }, { "epoch": 2.638522427440633, "eval_loss": 2.331491470336914, "eval_runtime": 61.2796, "eval_samples_per_second": 1540.562, "eval_steps_per_second": 6.022, "step": 39000 }, { "epoch": 2.841485691089913, "grad_norm": 0.8671875, "learning_rate": 1.5851430891008727e-05, "loss": 2.665, "step": 42000 }, { "epoch": 2.841485691089913, "eval_loss": 2.331724166870117, "eval_runtime": 61.1466, "eval_samples_per_second": 1543.912, "eval_steps_per_second": 6.035, "step": 42000 }, { "epoch": 3.0, "step": 44343, "total_flos": 4.6159075948098355e+17, "train_loss": 2.682002373542611, "train_runtime": 22758.1142, "train_samples_per_second": 498.788, "train_steps_per_second": 1.948 } ], "logging_steps": 3000, "max_steps": 44343, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 3000, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 4.6159075948098355e+17, "train_batch_size": 256, "trial_name": null, "trial_params": null }