|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.394648829431438, |
|
"eval_steps": 200, |
|
"global_step": 236, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0, |
|
"learning_rate": 1e-06, |
|
"loss": 0.8728, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.03, |
|
"learning_rate": 9.999999999999999e-06, |
|
"loss": 0.8297, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.07, |
|
"learning_rate": 1.9999999999999998e-05, |
|
"loss": 0.7256, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.1, |
|
"learning_rate": 3e-05, |
|
"loss": 0.6035, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.13, |
|
"learning_rate": 2.99770621018718e-05, |
|
"loss": 0.4872, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.17, |
|
"learning_rate": 2.990831856044326e-05, |
|
"loss": 0.476, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"learning_rate": 2.979397962002776e-05, |
|
"loss": 0.4522, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.23, |
|
"learning_rate": 2.9634394973287605e-05, |
|
"loss": 0.4198, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.27, |
|
"learning_rate": 2.943005269173876e-05, |
|
"loss": 0.434, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.3, |
|
"learning_rate": 2.9181577733039554e-05, |
|
"loss": 0.4245, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.33, |
|
"learning_rate": 2.8889730029628665e-05, |
|
"loss": 0.4332, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.37, |
|
"learning_rate": 2.8555402164558058e-05, |
|
"loss": 0.418, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.01, |
|
"learning_rate": 2.8179616641629125e-05, |
|
"loss": 0.4121, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.04, |
|
"learning_rate": 2.776352275818093e-05, |
|
"loss": 0.3505, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.07, |
|
"learning_rate": 2.730839309009485e-05, |
|
"loss": 0.3558, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.11, |
|
"learning_rate": 2.6815619599765775e-05, |
|
"loss": 0.3613, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.14, |
|
"learning_rate": 2.628670937894323e-05, |
|
"loss": 0.3646, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.17, |
|
"learning_rate": 2.572328003946244e-05, |
|
"loss": 0.3456, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 1.21, |
|
"learning_rate": 2.512705476596226e-05, |
|
"loss": 0.3435, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 1.24, |
|
"learning_rate": 2.4499857045720705e-05, |
|
"loss": 0.3614, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"learning_rate": 2.3843605091726184e-05, |
|
"loss": 0.3374, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.27, |
|
"eval_loss": 0.5180116295814514, |
|
"eval_runtime": 6.8175, |
|
"eval_samples_per_second": 20.535, |
|
"eval_steps_per_second": 5.134, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 1.31, |
|
"learning_rate": 2.3160305976040984e-05, |
|
"loss": 0.3647, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 1.34, |
|
"learning_rate": 2.2452049491399336e-05, |
|
"loss": 0.3271, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 1.37, |
|
"learning_rate": 2.1721001759813677e-05, |
|
"loss": 0.3519, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 1.39, |
|
"step": 236, |
|
"total_flos": 4.241630717752115e+16, |
|
"train_loss": 0.4328786596908408, |
|
"train_runtime": 504.1662, |
|
"train_samples_per_second": 4.744, |
|
"train_steps_per_second": 1.186 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 598, |
|
"num_train_epochs": 2, |
|
"save_steps": 200, |
|
"total_flos": 4.241630717752115e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|