|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 12.038221352795125, |
|
"eval_steps": 500, |
|
"global_step": 10000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.24, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.7974, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.48, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.78, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.72, |
|
"learning_rate": 2.968421052631579e-05, |
|
"loss": 1.7735, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.96, |
|
"learning_rate": 2.905263157894737e-05, |
|
"loss": 1.7659, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 1.2, |
|
"learning_rate": 2.8421052631578946e-05, |
|
"loss": 1.7449, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 1.44, |
|
"learning_rate": 2.7789473684210526e-05, |
|
"loss": 1.7377, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 1.69, |
|
"learning_rate": 2.7157894736842106e-05, |
|
"loss": 1.7293, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 1.93, |
|
"learning_rate": 2.6526315789473685e-05, |
|
"loss": 1.7259, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 2.17, |
|
"learning_rate": 2.5894736842105265e-05, |
|
"loss": 1.705, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 2.41, |
|
"learning_rate": 2.526315789473684e-05, |
|
"loss": 1.6861, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 2.65, |
|
"learning_rate": 2.463157894736842e-05, |
|
"loss": 1.6913, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 2.89, |
|
"learning_rate": 2.4e-05, |
|
"loss": 1.6857, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 3.13, |
|
"learning_rate": 2.336842105263158e-05, |
|
"loss": 1.6676, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 3.37, |
|
"learning_rate": 2.273684210526316e-05, |
|
"loss": 1.6573, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 3.61, |
|
"learning_rate": 2.2105263157894736e-05, |
|
"loss": 1.6504, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 3.85, |
|
"learning_rate": 2.1473684210526316e-05, |
|
"loss": 1.6465, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 4.09, |
|
"learning_rate": 2.0842105263157895e-05, |
|
"loss": 1.6333, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 4.33, |
|
"learning_rate": 2.0210526315789475e-05, |
|
"loss": 1.6197, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 4.57, |
|
"learning_rate": 1.9578947368421055e-05, |
|
"loss": 1.6201, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 4.82, |
|
"learning_rate": 1.894736842105263e-05, |
|
"loss": 1.6179, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 5.06, |
|
"learning_rate": 1.831578947368421e-05, |
|
"loss": 1.6134, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 5.3, |
|
"learning_rate": 1.7684210526315787e-05, |
|
"loss": 1.5945, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 5.54, |
|
"learning_rate": 1.705263157894737e-05, |
|
"loss": 1.588, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 5.78, |
|
"learning_rate": 1.642105263157895e-05, |
|
"loss": 1.5949, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 6.02, |
|
"learning_rate": 1.5789473684210526e-05, |
|
"loss": 1.585, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 6.26, |
|
"learning_rate": 1.5157894736842105e-05, |
|
"loss": 1.5695, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 6.5, |
|
"learning_rate": 1.4526315789473685e-05, |
|
"loss": 1.5656, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 6.74, |
|
"learning_rate": 1.3894736842105263e-05, |
|
"loss": 1.5687, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 6.98, |
|
"learning_rate": 1.3263157894736843e-05, |
|
"loss": 1.5669, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 7.22, |
|
"learning_rate": 1.263157894736842e-05, |
|
"loss": 1.5511, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 7.46, |
|
"learning_rate": 1.2e-05, |
|
"loss": 1.5488, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 7.7, |
|
"learning_rate": 1.136842105263158e-05, |
|
"loss": 1.5462, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 7.95, |
|
"learning_rate": 1.0736842105263158e-05, |
|
"loss": 1.5507, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 8.19, |
|
"learning_rate": 1.0105263157894738e-05, |
|
"loss": 1.5364, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 8.43, |
|
"learning_rate": 9.473684210526315e-06, |
|
"loss": 1.5352, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 8.67, |
|
"learning_rate": 8.842105263157893e-06, |
|
"loss": 1.533, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 8.91, |
|
"learning_rate": 8.210526315789475e-06, |
|
"loss": 1.5292, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 9.15, |
|
"learning_rate": 7.578947368421053e-06, |
|
"loss": 1.5258, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 9.39, |
|
"learning_rate": 6.9473684210526315e-06, |
|
"loss": 1.5194, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 9.63, |
|
"learning_rate": 6.31578947368421e-06, |
|
"loss": 1.5205, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 9.87, |
|
"learning_rate": 5.68421052631579e-06, |
|
"loss": 1.5163, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 10.11, |
|
"learning_rate": 5.052631578947369e-06, |
|
"loss": 1.5182, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 10.35, |
|
"learning_rate": 4.421052631578947e-06, |
|
"loss": 1.5126, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 10.59, |
|
"learning_rate": 3.7894736842105264e-06, |
|
"loss": 1.5102, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 10.83, |
|
"learning_rate": 3.157894736842105e-06, |
|
"loss": 1.5073, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 11.08, |
|
"learning_rate": 2.5263157894736844e-06, |
|
"loss": 1.5104, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 11.32, |
|
"learning_rate": 1.8947368421052632e-06, |
|
"loss": 1.5033, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 11.56, |
|
"learning_rate": 1.2631578947368422e-06, |
|
"loss": 1.5055, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 11.8, |
|
"learning_rate": 6.315789473684211e-07, |
|
"loss": 1.5047, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 12.04, |
|
"learning_rate": 0.0, |
|
"loss": 1.5057, |
|
"step": 10000 |
|
} |
|
], |
|
"logging_steps": 200, |
|
"max_steps": 10000, |
|
"num_train_epochs": 13, |
|
"save_steps": 500, |
|
"total_flos": 1.1107602523509228e+18, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|