|
{ |
|
"best_metric": 0.30522507429122925, |
|
"best_model_checkpoint": "finetuned-ai-real/checkpoint-25", |
|
"epoch": 4.0, |
|
"eval_steps": 25, |
|
"global_step": 176, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.22727272727272727, |
|
"grad_norm": 7.896988868713379, |
|
"learning_rate": 0.00019090909090909092, |
|
"loss": 0.5879, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.45454545454545453, |
|
"grad_norm": 9.905610084533691, |
|
"learning_rate": 0.00017954545454545456, |
|
"loss": 0.5817, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5681818181818182, |
|
"eval_accuracy": 0.8911290322580645, |
|
"eval_loss": 0.30522507429122925, |
|
"eval_runtime": 4.9056, |
|
"eval_samples_per_second": 50.555, |
|
"eval_steps_per_second": 6.319, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6818181818181818, |
|
"grad_norm": 7.3549628257751465, |
|
"learning_rate": 0.0001681818181818182, |
|
"loss": 0.3464, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.9090909090909091, |
|
"grad_norm": 2.119649887084961, |
|
"learning_rate": 0.00015681818181818182, |
|
"loss": 0.2422, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"grad_norm": 10.291028022766113, |
|
"learning_rate": 0.00014545454545454546, |
|
"loss": 0.2006, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.1363636363636362, |
|
"eval_accuracy": 0.8911290322580645, |
|
"eval_loss": 0.3756392002105713, |
|
"eval_runtime": 4.8724, |
|
"eval_samples_per_second": 50.899, |
|
"eval_steps_per_second": 6.362, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3636363636363638, |
|
"grad_norm": 1.0470197200775146, |
|
"learning_rate": 0.0001340909090909091, |
|
"loss": 0.2003, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5909090909090908, |
|
"grad_norm": 2.770779848098755, |
|
"learning_rate": 0.00012272727272727272, |
|
"loss": 0.2175, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.7045454545454546, |
|
"eval_accuracy": 0.8830645161290323, |
|
"eval_loss": 0.3168693780899048, |
|
"eval_runtime": 4.5227, |
|
"eval_samples_per_second": 54.834, |
|
"eval_steps_per_second": 6.854, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.8181818181818183, |
|
"grad_norm": 6.933122158050537, |
|
"learning_rate": 0.00011136363636363636, |
|
"loss": 0.1467, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.0454545454545454, |
|
"grad_norm": 2.787731885910034, |
|
"learning_rate": 0.0001, |
|
"loss": 0.1328, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"grad_norm": 2.367295980453491, |
|
"learning_rate": 8.863636363636364e-05, |
|
"loss": 0.07, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.2727272727272725, |
|
"eval_accuracy": 0.9112903225806451, |
|
"eval_loss": 0.363909512758255, |
|
"eval_runtime": 5.2997, |
|
"eval_samples_per_second": 46.795, |
|
"eval_steps_per_second": 5.849, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 2.5, |
|
"grad_norm": 2.1476662158966064, |
|
"learning_rate": 7.727272727272727e-05, |
|
"loss": 0.1031, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 2.7272727272727275, |
|
"grad_norm": 1.065962553024292, |
|
"learning_rate": 6.59090909090909e-05, |
|
"loss": 0.0568, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 2.840909090909091, |
|
"eval_accuracy": 0.9233870967741935, |
|
"eval_loss": 0.34076112508773804, |
|
"eval_runtime": 4.5072, |
|
"eval_samples_per_second": 55.023, |
|
"eval_steps_per_second": 6.878, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 2.9545454545454546, |
|
"grad_norm": 10.271200180053711, |
|
"learning_rate": 5.4545454545454546e-05, |
|
"loss": 0.1321, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 3.1818181818181817, |
|
"grad_norm": 1.2485915422439575, |
|
"learning_rate": 4.318181818181819e-05, |
|
"loss": 0.0432, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 3.409090909090909, |
|
"grad_norm": 6.485482215881348, |
|
"learning_rate": 3.181818181818182e-05, |
|
"loss": 0.0793, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.409090909090909, |
|
"eval_accuracy": 0.907258064516129, |
|
"eval_loss": 0.3802954852581024, |
|
"eval_runtime": 5.5626, |
|
"eval_samples_per_second": 44.583, |
|
"eval_steps_per_second": 5.573, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 3.6363636363636362, |
|
"grad_norm": 0.10474708676338196, |
|
"learning_rate": 2.0454545454545457e-05, |
|
"loss": 0.0421, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 3.8636363636363638, |
|
"grad_norm": 0.7910298109054565, |
|
"learning_rate": 9.090909090909091e-06, |
|
"loss": 0.0292, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 3.9772727272727275, |
|
"eval_accuracy": 0.9193548387096774, |
|
"eval_loss": 0.3216198682785034, |
|
"eval_runtime": 4.5692, |
|
"eval_samples_per_second": 54.277, |
|
"eval_steps_per_second": 6.785, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"step": 176, |
|
"total_flos": 1.0966148095175885e+17, |
|
"train_loss": 0.1836181137372147, |
|
"train_runtime": 150.901, |
|
"train_samples_per_second": 37.137, |
|
"train_steps_per_second": 1.166 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 176, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 4, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.0966148095175885e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|