{ "best_metric": 0.30522507429122925, "best_model_checkpoint": "finetuned-ai-real/checkpoint-25", "epoch": 4.0, "eval_steps": 25, "global_step": 176, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.22727272727272727, "grad_norm": 7.896988868713379, "learning_rate": 0.00019090909090909092, "loss": 0.5879, "step": 10 }, { "epoch": 0.45454545454545453, "grad_norm": 9.905610084533691, "learning_rate": 0.00017954545454545456, "loss": 0.5817, "step": 20 }, { "epoch": 0.5681818181818182, "eval_accuracy": 0.8911290322580645, "eval_loss": 0.30522507429122925, "eval_runtime": 4.9056, "eval_samples_per_second": 50.555, "eval_steps_per_second": 6.319, "step": 25 }, { "epoch": 0.6818181818181818, "grad_norm": 7.3549628257751465, "learning_rate": 0.0001681818181818182, "loss": 0.3464, "step": 30 }, { "epoch": 0.9090909090909091, "grad_norm": 2.119649887084961, "learning_rate": 0.00015681818181818182, "loss": 0.2422, "step": 40 }, { "epoch": 1.1363636363636362, "grad_norm": 10.291028022766113, "learning_rate": 0.00014545454545454546, "loss": 0.2006, "step": 50 }, { "epoch": 1.1363636363636362, "eval_accuracy": 0.8911290322580645, "eval_loss": 0.3756392002105713, "eval_runtime": 4.8724, "eval_samples_per_second": 50.899, "eval_steps_per_second": 6.362, "step": 50 }, { "epoch": 1.3636363636363638, "grad_norm": 1.0470197200775146, "learning_rate": 0.0001340909090909091, "loss": 0.2003, "step": 60 }, { "epoch": 1.5909090909090908, "grad_norm": 2.770779848098755, "learning_rate": 0.00012272727272727272, "loss": 0.2175, "step": 70 }, { "epoch": 1.7045454545454546, "eval_accuracy": 0.8830645161290323, "eval_loss": 0.3168693780899048, "eval_runtime": 4.5227, "eval_samples_per_second": 54.834, "eval_steps_per_second": 6.854, "step": 75 }, { "epoch": 1.8181818181818183, "grad_norm": 6.933122158050537, "learning_rate": 0.00011136363636363636, "loss": 0.1467, "step": 80 }, { "epoch": 2.0454545454545454, "grad_norm": 2.787731885910034, "learning_rate": 0.0001, "loss": 0.1328, "step": 90 }, { "epoch": 2.2727272727272725, "grad_norm": 2.367295980453491, "learning_rate": 8.863636363636364e-05, "loss": 0.07, "step": 100 }, { "epoch": 2.2727272727272725, "eval_accuracy": 0.9112903225806451, "eval_loss": 0.363909512758255, "eval_runtime": 5.2997, "eval_samples_per_second": 46.795, "eval_steps_per_second": 5.849, "step": 100 }, { "epoch": 2.5, "grad_norm": 2.1476662158966064, "learning_rate": 7.727272727272727e-05, "loss": 0.1031, "step": 110 }, { "epoch": 2.7272727272727275, "grad_norm": 1.065962553024292, "learning_rate": 6.59090909090909e-05, "loss": 0.0568, "step": 120 }, { "epoch": 2.840909090909091, "eval_accuracy": 0.9233870967741935, "eval_loss": 0.34076112508773804, "eval_runtime": 4.5072, "eval_samples_per_second": 55.023, "eval_steps_per_second": 6.878, "step": 125 }, { "epoch": 2.9545454545454546, "grad_norm": 10.271200180053711, "learning_rate": 5.4545454545454546e-05, "loss": 0.1321, "step": 130 }, { "epoch": 3.1818181818181817, "grad_norm": 1.2485915422439575, "learning_rate": 4.318181818181819e-05, "loss": 0.0432, "step": 140 }, { "epoch": 3.409090909090909, "grad_norm": 6.485482215881348, "learning_rate": 3.181818181818182e-05, "loss": 0.0793, "step": 150 }, { "epoch": 3.409090909090909, "eval_accuracy": 0.907258064516129, "eval_loss": 0.3802954852581024, "eval_runtime": 5.5626, "eval_samples_per_second": 44.583, "eval_steps_per_second": 5.573, "step": 150 }, { "epoch": 3.6363636363636362, "grad_norm": 0.10474708676338196, "learning_rate": 2.0454545454545457e-05, "loss": 0.0421, "step": 160 }, { "epoch": 3.8636363636363638, "grad_norm": 0.7910298109054565, "learning_rate": 9.090909090909091e-06, "loss": 0.0292, "step": 170 }, { "epoch": 3.9772727272727275, "eval_accuracy": 0.9193548387096774, "eval_loss": 0.3216198682785034, "eval_runtime": 4.5692, "eval_samples_per_second": 54.277, "eval_steps_per_second": 6.785, "step": 175 }, { "epoch": 4.0, "step": 176, "total_flos": 1.0966148095175885e+17, "train_loss": 0.1836181137372147, "train_runtime": 150.901, "train_samples_per_second": 37.137, "train_steps_per_second": 1.166 } ], "logging_steps": 10, "max_steps": 176, "num_input_tokens_seen": 0, "num_train_epochs": 4, "save_steps": 25, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 1.0966148095175885e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }