{ "best_metric": null, "best_model_checkpoint": null, "epoch": 1.984, "eval_steps": 500, "global_step": 124, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.08, "grad_norm": 50.95399475097656, "learning_rate": 9.959935885253715e-06, "loss": 5.4476, "num_input_tokens_seen": 82560, "step": 5 }, { "epoch": 0.16, "grad_norm": 406.1771545410156, "learning_rate": 9.840385594331022e-06, "loss": 5.3325, "num_input_tokens_seen": 165120, "step": 10 }, { "epoch": 0.24, "grad_norm": 52.38233947753906, "learning_rate": 9.643264997861312e-06, "loss": 5.0431, "num_input_tokens_seen": 247680, "step": 15 }, { "epoch": 0.32, "grad_norm": 377.5094909667969, "learning_rate": 9.371733080722911e-06, "loss": 4.4316, "num_input_tokens_seen": 330240, "step": 20 }, { "epoch": 0.4, "grad_norm": 42.05713653564453, "learning_rate": 9.030141317270026e-06, "loss": 3.253, "num_input_tokens_seen": 412800, "step": 25 }, { "epoch": 0.48, "grad_norm": 346.2967224121094, "learning_rate": 8.6239639361456e-06, "loss": 3.1498, "num_input_tokens_seen": 495360, "step": 30 }, { "epoch": 0.56, "grad_norm": 161.69073486328125, "learning_rate": 8.15971019223152e-06, "loss": 2.2606, "num_input_tokens_seen": 577920, "step": 35 }, { "epoch": 0.64, "grad_norm": 36.605567932128906, "learning_rate": 7.644820051634813e-06, "loss": 1.9825, "num_input_tokens_seen": 660480, "step": 40 }, { "epoch": 0.72, "grad_norm": 73.60350799560547, "learning_rate": 7.087544961425317e-06, "loss": 1.4754, "num_input_tokens_seen": 743040, "step": 45 }, { "epoch": 0.8, "grad_norm": 158.03152465820312, "learning_rate": 6.496815614866792e-06, "loss": 0.9276, "num_input_tokens_seen": 825600, "step": 50 }, { "epoch": 0.88, "grad_norm": 7.710397720336914, "learning_rate": 5.882098831289044e-06, "loss": 0.3203, "num_input_tokens_seen": 908160, "step": 55 }, { "epoch": 0.96, "grad_norm": 15.087837219238281, "learning_rate": 5.253245844193564e-06, "loss": 0.3668, "num_input_tokens_seen": 990720, "step": 60 }, { "epoch": 1.04, "grad_norm": 5.742522716522217, "learning_rate": 4.62033442887377e-06, "loss": 0.1251, "num_input_tokens_seen": 1073280, "step": 65 }, { "epoch": 1.12, "grad_norm": 3.3580806255340576, "learning_rate": 3.993507399556699e-06, "loss": 0.0937, "num_input_tokens_seen": 1155840, "step": 70 }, { "epoch": 1.2, "grad_norm": 14.523807525634766, "learning_rate": 3.3828100642538097e-06, "loss": 0.1095, "num_input_tokens_seen": 1238400, "step": 75 }, { "epoch": 1.28, "grad_norm": 4.2941999435424805, "learning_rate": 2.7980292422118282e-06, "loss": 0.0431, "num_input_tokens_seen": 1320960, "step": 80 }, { "epoch": 1.3599999999999999, "grad_norm": 6.715703010559082, "learning_rate": 2.2485364238130435e-06, "loss": 0.0373, "num_input_tokens_seen": 1403520, "step": 85 }, { "epoch": 1.44, "grad_norm": 2.378614664077759, "learning_rate": 1.74313758638889e-06, "loss": 0.0501, "num_input_tokens_seen": 1486080, "step": 90 }, { "epoch": 1.52, "grad_norm": 3.5217220783233643, "learning_rate": 1.2899320727454472e-06, "loss": 0.0309, "num_input_tokens_seen": 1568640, "step": 95 }, { "epoch": 1.6, "grad_norm": 6.760895252227783, "learning_rate": 8.961827939636198e-07, "loss": 0.03, "num_input_tokens_seen": 1651200, "step": 100 }, { "epoch": 1.6800000000000002, "grad_norm": 0.4529302716255188, "learning_rate": 5.681998365579594e-07, "loss": 0.4213, "num_input_tokens_seen": 1733760, "step": 105 }, { "epoch": 1.76, "grad_norm": 22.67563247680664, "learning_rate": 3.112393392645985e-07, "loss": 0.0281, "num_input_tokens_seen": 1816320, "step": 110 }, { "epoch": 1.8399999999999999, "grad_norm": 6.4231414794921875, "learning_rate": 1.2941926002306536e-07, "loss": 0.0125, "num_input_tokens_seen": 1898880, "step": 115 }, { "epoch": 1.92, "grad_norm": 0.6063465476036072, "learning_rate": 2.5653383040524228e-08, "loss": 0.0098, "num_input_tokens_seen": 1981440, "step": 120 } ], "logging_steps": 5, "max_steps": 124, "num_input_tokens_seen": 2047488, "num_train_epochs": 2, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.955998176351027e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }