pharaoh / trainer_state.json
AlekseyKorshuk's picture
huggingartists
d255b1a
{
"best_metric": 1.6883338689804077,
"best_model_checkpoint": "output/pharaoh/checkpoint-174",
"epoch": 1.0,
"global_step": 174,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.03,
"learning_rate": 0.00013689676402972123,
"loss": 1.6111,
"step": 5
},
{
"epoch": 0.06,
"learning_rate": 0.0001359897369367762,
"loss": 1.6407,
"step": 10
},
{
"epoch": 0.09,
"learning_rate": 0.00013448693747453513,
"loss": 1.7296,
"step": 15
},
{
"epoch": 0.12,
"learning_rate": 0.0001324016514404635,
"loss": 1.6351,
"step": 20
},
{
"epoch": 0.15,
"learning_rate": 0.00012975231422038792,
"loss": 1.6431,
"step": 25
},
{
"epoch": 0.18,
"learning_rate": 0.0001265623478068018,
"loss": 1.5934,
"step": 30
},
{
"epoch": 0.21,
"learning_rate": 0.00012285995373208355,
"loss": 1.6383,
"step": 35
},
{
"epoch": 0.24,
"learning_rate": 0.00011867786374724195,
"loss": 1.688,
"step": 40
},
{
"epoch": 0.27,
"learning_rate": 0.00011405305045036307,
"loss": 1.7355,
"step": 45
},
{
"epoch": 0.3,
"learning_rate": 0.00010902640042300463,
"loss": 1.6672,
"step": 50
},
{
"epoch": 0.33,
"learning_rate": 0.00010364235276424244,
"loss": 1.6835,
"step": 55
},
{
"epoch": 0.36,
"learning_rate": 9.794850621797833e-05,
"loss": 1.5821,
"step": 60
},
{
"epoch": 0.39,
"learning_rate": 9.199519836678021e-05,
"loss": 1.701,
"step": 65
},
{
"epoch": 0.42,
"learning_rate": 8.583506061247361e-05,
"loss": 1.6752,
"step": 70
},
{
"epoch": 0.45,
"learning_rate": 7.95225528777695e-05,
"loss": 1.6845,
"step": 75
},
{
"epoch": 0.48,
"learning_rate": 7.311348214248791e-05,
"loss": 1.6935,
"step": 80
},
{
"epoch": 0.51,
"learning_rate": 6.666450907085589e-05,
"loss": 1.7025,
"step": 85
},
{
"epoch": 0.54,
"learning_rate": 6.023264709163982e-05,
"loss": 1.6483,
"step": 90
},
{
"epoch": 0.57,
"learning_rate": 5.387475835959079e-05,
"loss": 1.5692,
"step": 95
},
{
"epoch": 0.6,
"learning_rate": 4.7647051054259744e-05,
"loss": 1.6434,
"step": 100
},
{
"epoch": 0.63,
"learning_rate": 4.1604582460408433e-05,
"loss": 1.661,
"step": 105
},
{
"epoch": 0.66,
"learning_rate": 3.580077222313727e-05,
"loss": 1.6785,
"step": 110
},
{
"epoch": 0.69,
"learning_rate": 3.0286930080889863e-05,
"loss": 1.6299,
"step": 115
},
{
"epoch": 0.72,
"learning_rate": 2.511180225150775e-05,
"loss": 1.6296,
"step": 120
},
{
"epoch": 0.75,
"learning_rate": 2.0321140481598987e-05,
"loss": 1.6131,
"step": 125
},
{
"epoch": 0.78,
"learning_rate": 1.595729756912515e-05,
"loss": 1.6039,
"step": 130
},
{
"epoch": 0.81,
"learning_rate": 1.2058852935068877e-05,
"loss": 1.696,
"step": 135
},
{
"epoch": 0.84,
"learning_rate": 8.660271554391817e-06,
"loss": 1.6846,
"step": 140
},
{
"epoch": 0.87,
"learning_rate": 5.791599261570497e-06,
"loss": 1.6253,
"step": 145
},
{
"epoch": 0.9,
"learning_rate": 3.4781971244218827e-06,
"loss": 1.5759,
"step": 150
},
{
"epoch": 0.93,
"learning_rate": 1.7405172345409156e-06,
"loss": 1.628,
"step": 155
},
{
"epoch": 0.96,
"learning_rate": 5.939218965176846e-07,
"loss": 1.6359,
"step": 160
},
{
"epoch": 0.99,
"learning_rate": 4.8547814429227996e-08,
"loss": 1.614,
"step": 165
},
{
"epoch": 1.0,
"eval_loss": 1.7224986553192139,
"eval_runtime": 22.2232,
"eval_samples_per_second": 22.454,
"eval_steps_per_second": 2.835,
"step": 167
},
{
"epoch": 0.98,
"learning_rate": 3.5909924568597513e-06,
"loss": 1.4675,
"step": 170
},
{
"epoch": 1.0,
"eval_loss": 1.6883338689804077,
"eval_runtime": 18.8954,
"eval_samples_per_second": 23.498,
"eval_steps_per_second": 2.964,
"step": 174
}
],
"max_steps": 348,
"num_train_epochs": 2,
"total_flos": 181597962240000.0,
"trial_name": null,
"trial_params": null
}