{ "best_metric": null, "best_model_checkpoint": null, "epoch": 2.9957446808510637, "eval_steps": 500, "global_step": 264, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.11347517730496454, "grad_norm": 2.358112635127941, "learning_rate": 5e-06, "loss": 1.045, "step": 10 }, { "epoch": 0.22695035460992907, "grad_norm": 2.2025871190711044, "learning_rate": 5e-06, "loss": 0.939, "step": 20 }, { "epoch": 0.3404255319148936, "grad_norm": 1.183824809408691, "learning_rate": 5e-06, "loss": 0.8991, "step": 30 }, { "epoch": 0.45390070921985815, "grad_norm": 1.2551021476147783, "learning_rate": 5e-06, "loss": 0.8706, "step": 40 }, { "epoch": 0.5673758865248227, "grad_norm": 1.6978646589888085, "learning_rate": 5e-06, "loss": 0.8565, "step": 50 }, { "epoch": 0.6808510638297872, "grad_norm": 2.13778699873673, "learning_rate": 5e-06, "loss": 0.8386, "step": 60 }, { "epoch": 0.7943262411347518, "grad_norm": 0.993295806091264, "learning_rate": 5e-06, "loss": 0.8309, "step": 70 }, { "epoch": 0.9078014184397163, "grad_norm": 1.1398762974593635, "learning_rate": 5e-06, "loss": 0.823, "step": 80 }, { "epoch": 0.9985815602836879, "eval_loss": 0.8090236783027649, "eval_runtime": 63.3465, "eval_samples_per_second": 37.445, "eval_steps_per_second": 0.6, "step": 88 }, { "epoch": 1.0212765957446808, "grad_norm": 1.013500095467207, "learning_rate": 5e-06, "loss": 0.8742, "step": 90 }, { "epoch": 1.1347517730496455, "grad_norm": 1.056225323349834, "learning_rate": 5e-06, "loss": 0.7667, "step": 100 }, { "epoch": 1.24822695035461, "grad_norm": 0.7290196034792423, "learning_rate": 5e-06, "loss": 0.755, "step": 110 }, { "epoch": 1.3617021276595744, "grad_norm": 0.8838498260846974, "learning_rate": 5e-06, "loss": 0.7554, "step": 120 }, { "epoch": 1.475177304964539, "grad_norm": 0.821991213787815, "learning_rate": 5e-06, "loss": 0.7556, "step": 130 }, { "epoch": 1.5886524822695036, "grad_norm": 0.9855152726966359, "learning_rate": 5e-06, "loss": 0.7493, "step": 140 }, { "epoch": 1.702127659574468, "grad_norm": 0.6490086567527167, "learning_rate": 5e-06, "loss": 0.7477, "step": 150 }, { "epoch": 1.8156028368794326, "grad_norm": 1.0694149262660388, "learning_rate": 5e-06, "loss": 0.7414, "step": 160 }, { "epoch": 1.9290780141843973, "grad_norm": 0.9645011140855406, "learning_rate": 5e-06, "loss": 0.7481, "step": 170 }, { "epoch": 1.9971631205673759, "eval_loss": 0.7898643016815186, "eval_runtime": 62.2492, "eval_samples_per_second": 38.105, "eval_steps_per_second": 0.61, "step": 176 }, { "epoch": 2.0425531914893615, "grad_norm": 1.581859190270789, "learning_rate": 5e-06, "loss": 0.7818, "step": 180 }, { "epoch": 2.1560283687943262, "grad_norm": 1.0466470957786433, "learning_rate": 5e-06, "loss": 0.6863, "step": 190 }, { "epoch": 2.269503546099291, "grad_norm": 0.9663026123669691, "learning_rate": 5e-06, "loss": 0.6798, "step": 200 }, { "epoch": 2.382978723404255, "grad_norm": 0.8243226264574698, "learning_rate": 5e-06, "loss": 0.6826, "step": 210 }, { "epoch": 2.49645390070922, "grad_norm": 1.0907354136557872, "learning_rate": 5e-06, "loss": 0.6839, "step": 220 }, { "epoch": 2.6099290780141846, "grad_norm": 0.7996806357479502, "learning_rate": 5e-06, "loss": 0.687, "step": 230 }, { "epoch": 2.723404255319149, "grad_norm": 0.9108831837931511, "learning_rate": 5e-06, "loss": 0.6902, "step": 240 }, { "epoch": 2.8368794326241136, "grad_norm": 0.8473372600949097, "learning_rate": 5e-06, "loss": 0.6873, "step": 250 }, { "epoch": 2.950354609929078, "grad_norm": 0.8244777156304377, "learning_rate": 5e-06, "loss": 0.6866, "step": 260 }, { "epoch": 2.9957446808510637, "eval_loss": 0.7845782041549683, "eval_runtime": 58.8988, "eval_samples_per_second": 40.272, "eval_steps_per_second": 0.645, "step": 264 }, { "epoch": 2.9957446808510637, "step": 264, "total_flos": 442000453140480.0, "train_loss": 0.7778021304896383, "train_runtime": 8882.1716, "train_samples_per_second": 15.22, "train_steps_per_second": 0.03 } ], "logging_steps": 10, "max_steps": 264, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 442000453140480.0, "train_batch_size": 8, "trial_name": null, "trial_params": null }