{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.05, "global_step": 10000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0, "learning_rate": 0.00010077985004622052, "loss": 0.5399, "step": 500 }, { "epoch": 0.01, "learning_rate": 0.00010077565027123787, "loss": 0.4951, "step": 1000 }, { "epoch": 0.01, "learning_rate": 0.00010076865093411392, "loss": 0.487, "step": 1500 }, { "epoch": 0.01, "learning_rate": 0.00010075885246660077, "loss": 0.4806, "step": 2000 }, { "epoch": 0.01, "learning_rate": 0.00010074625547311406, "loss": 0.4778, "step": 2500 }, { "epoch": 0.01, "learning_rate": 0.00010073086073069567, "loss": 0.4747, "step": 3000 }, { "epoch": 0.02, "learning_rate": 0.00010071266918896582, "loss": 0.4732, "step": 3500 }, { "epoch": 0.02, "learning_rate": 0.0001006916819700645, "loss": 0.4724, "step": 4000 }, { "epoch": 0.02, "learning_rate": 0.00010066790036858225, "loss": 0.4708, "step": 4500 }, { "epoch": 0.03, "learning_rate": 0.00010064132585148025, "loss": 0.4677, "step": 5000 }, { "epoch": 0.03, "eval_loss": 0.4321456551551819, "eval_runtime": 334.7353, "eval_samples_per_second": 128.46, "eval_steps_per_second": 2.008, "step": 5000 }, { "epoch": 0.03, "learning_rate": 0.0001006119600579999, "loss": 0.4673, "step": 5500 }, { "epoch": 0.03, "learning_rate": 0.00010057980479956167, "loss": 0.4649, "step": 6000 }, { "epoch": 0.03, "learning_rate": 0.00010054486205965335, "loss": 0.465, "step": 6500 }, { "epoch": 0.04, "learning_rate": 0.00010050713399370776, "loss": 0.4644, "step": 7000 }, { "epoch": 0.04, "learning_rate": 0.00010046662292896969, "loss": 0.4639, "step": 7500 }, { "epoch": 0.04, "learning_rate": 0.00010042342072067417, "loss": 0.4624, "step": 8000 }, { "epoch": 0.04, "learning_rate": 0.00010037735687948529, "loss": 0.463, "step": 8500 }, { "epoch": 0.04, "learning_rate": 0.00010032851804476767, "loss": 0.4606, "step": 9000 }, { "epoch": 0.05, "learning_rate": 0.00010027690722913066, "loss": 0.4595, "step": 9500 }, { "epoch": 0.05, "learning_rate": 0.00010022263913645699, "loss": 0.4605, "step": 10000 }, { "epoch": 0.05, "eval_loss": 0.42437314987182617, "eval_runtime": 366.557, "eval_samples_per_second": 117.308, "eval_steps_per_second": 1.833, "step": 10000 } ], "max_steps": 200000, "num_train_epochs": 9223372036854775807, "total_flos": 2.354930673647616e+20, "trial_name": null, "trial_params": null }