{ "best_metric": null, "best_model_checkpoint": null, "epoch": 4.998522895125554, "eval_steps": 500, "global_step": 846, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.29542097488921715, "grad_norm": 48.92063522338867, "learning_rate": 2.45e-05, "loss": 352758333.44, "step": 50 }, { "epoch": 0.5908419497784343, "grad_norm": 6.408321380615234, "learning_rate": 4.9500000000000004e-05, "loss": 178610135.04, "step": 100 }, { "epoch": 0.8862629246676514, "grad_norm": 1.5189077854156494, "learning_rate": 4.731947483588622e-05, "loss": 0.4129, "step": 150 }, { "epoch": 1.1816838995568686, "grad_norm": 2.1191959381103516, "learning_rate": 4.458424507658643e-05, "loss": 0.2667, "step": 200 }, { "epoch": 1.4771048744460857, "grad_norm": 4.9505510330200195, "learning_rate": 4.201312910284464e-05, "loss": 27.1689, "step": 250 }, { "epoch": 1.7725258493353029, "grad_norm": 0.197406604886055, "learning_rate": 3.9332603938730855e-05, "loss": 4.521, "step": 300 }, { "epoch": 2.06794682422452, "grad_norm": 0.035883717238903046, "learning_rate": 3.6597374179431074e-05, "loss": 0.0013, "step": 350 }, { "epoch": 2.363367799113737, "grad_norm": 0.02112976275384426, "learning_rate": 3.386214442013129e-05, "loss": 0.0006, "step": 400 }, { "epoch": 2.658788774002954, "grad_norm": 0.005675207823514938, "learning_rate": 3.112691466083151e-05, "loss": 0.0003, "step": 450 }, { "epoch": 2.9542097488921715, "grad_norm": 0.004977445118129253, "learning_rate": 2.839168490153173e-05, "loss": 0.0003, "step": 500 }, { "epoch": 3.2496307237813884, "grad_norm": 0.0056141638197004795, "learning_rate": 2.565645514223195e-05, "loss": 0.0002, "step": 550 }, { "epoch": 3.5450516986706058, "grad_norm": 0.0035467667039483786, "learning_rate": 2.292122538293217e-05, "loss": 0.0002, "step": 600 }, { "epoch": 3.8404726735598227, "grad_norm": 0.0036520687863230705, "learning_rate": 2.0185995623632387e-05, "loss": 0.0001, "step": 650 }, { "epoch": 4.13589364844904, "grad_norm": 0.002137696836143732, "learning_rate": 1.7450765864332606e-05, "loss": 0.0001, "step": 700 }, { "epoch": 4.431314623338257, "grad_norm": 0.0029208394698798656, "learning_rate": 1.4715536105032822e-05, "loss": 0.0001, "step": 750 }, { "epoch": 4.726735598227474, "grad_norm": 0.0022111597936600447, "learning_rate": 1.1980306345733041e-05, "loss": 0.0001, "step": 800 } ], "logging_steps": 50, "max_steps": 1014, "num_input_tokens_seen": 0, "num_train_epochs": 6, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.336633744490496e+17, "train_batch_size": 2, "trial_name": null, "trial_params": null }