|
{
|
|
"best_metric": 0.8478260869565217,
|
|
"best_model_checkpoint": "BEiT-DMAE-13XDA-REVAL-80-32\\checkpoint-1144",
|
|
"epoch": 79.66804979253112,
|
|
"eval_steps": 500,
|
|
"global_step": 4800,
|
|
"is_hyper_param_search": false,
|
|
"is_local_process_zero": true,
|
|
"is_world_process_zero": true,
|
|
"log_history": [
|
|
{
|
|
"epoch": 0.17,
|
|
"learning_rate": 7.291666666666665e-07,
|
|
"loss": 1.6222,
|
|
"step": 10
|
|
},
|
|
{
|
|
"epoch": 0.33,
|
|
"learning_rate": 1.458333333333333e-06,
|
|
"loss": 1.6488,
|
|
"step": 20
|
|
},
|
|
{
|
|
"epoch": 0.5,
|
|
"learning_rate": 2.1875e-06,
|
|
"loss": 1.611,
|
|
"step": 30
|
|
},
|
|
{
|
|
"epoch": 0.66,
|
|
"learning_rate": 2.916666666666666e-06,
|
|
"loss": 1.6356,
|
|
"step": 40
|
|
},
|
|
{
|
|
"epoch": 0.83,
|
|
"learning_rate": 3.6458333333333333e-06,
|
|
"loss": 1.602,
|
|
"step": 50
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"learning_rate": 4.375e-06,
|
|
"loss": 1.5474,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 1.0,
|
|
"eval_accuracy": 0.45652173913043476,
|
|
"eval_loss": 1.2944936752319336,
|
|
"eval_runtime": 0.6632,
|
|
"eval_samples_per_second": 69.361,
|
|
"eval_steps_per_second": 4.524,
|
|
"step": 60
|
|
},
|
|
{
|
|
"epoch": 1.16,
|
|
"learning_rate": 5.104166666666667e-06,
|
|
"loss": 1.561,
|
|
"step": 70
|
|
},
|
|
{
|
|
"epoch": 1.33,
|
|
"learning_rate": 5.833333333333332e-06,
|
|
"loss": 1.4872,
|
|
"step": 80
|
|
},
|
|
{
|
|
"epoch": 1.49,
|
|
"learning_rate": 6.5624999999999994e-06,
|
|
"loss": 1.458,
|
|
"step": 90
|
|
},
|
|
{
|
|
"epoch": 1.66,
|
|
"learning_rate": 7.291666666666667e-06,
|
|
"loss": 1.4426,
|
|
"step": 100
|
|
},
|
|
{
|
|
"epoch": 1.83,
|
|
"learning_rate": 8.020833333333333e-06,
|
|
"loss": 1.4296,
|
|
"step": 110
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"learning_rate": 8.75e-06,
|
|
"loss": 1.3959,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 1.99,
|
|
"eval_accuracy": 0.45652173913043476,
|
|
"eval_loss": 1.2745169401168823,
|
|
"eval_runtime": 0.6327,
|
|
"eval_samples_per_second": 72.708,
|
|
"eval_steps_per_second": 4.742,
|
|
"step": 120
|
|
},
|
|
{
|
|
"epoch": 2.16,
|
|
"learning_rate": 9.479166666666666e-06,
|
|
"loss": 1.3373,
|
|
"step": 130
|
|
},
|
|
{
|
|
"epoch": 2.32,
|
|
"learning_rate": 1.0208333333333334e-05,
|
|
"loss": 1.2946,
|
|
"step": 140
|
|
},
|
|
{
|
|
"epoch": 2.49,
|
|
"learning_rate": 1.0937499999999998e-05,
|
|
"loss": 1.236,
|
|
"step": 150
|
|
},
|
|
{
|
|
"epoch": 2.66,
|
|
"learning_rate": 1.1666666666666665e-05,
|
|
"loss": 1.1899,
|
|
"step": 160
|
|
},
|
|
{
|
|
"epoch": 2.82,
|
|
"learning_rate": 1.2395833333333333e-05,
|
|
"loss": 1.1186,
|
|
"step": 170
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"learning_rate": 1.3124999999999999e-05,
|
|
"loss": 1.0517,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 2.99,
|
|
"eval_accuracy": 0.6086956521739131,
|
|
"eval_loss": 0.9631962180137634,
|
|
"eval_runtime": 0.6872,
|
|
"eval_samples_per_second": 66.943,
|
|
"eval_steps_per_second": 4.366,
|
|
"step": 180
|
|
},
|
|
{
|
|
"epoch": 3.15,
|
|
"learning_rate": 1.3854166666666665e-05,
|
|
"loss": 0.9828,
|
|
"step": 190
|
|
},
|
|
{
|
|
"epoch": 3.32,
|
|
"learning_rate": 1.4583333333333333e-05,
|
|
"loss": 0.9259,
|
|
"step": 200
|
|
},
|
|
{
|
|
"epoch": 3.49,
|
|
"learning_rate": 1.53125e-05,
|
|
"loss": 0.8746,
|
|
"step": 210
|
|
},
|
|
{
|
|
"epoch": 3.65,
|
|
"learning_rate": 1.6041666666666666e-05,
|
|
"loss": 0.8467,
|
|
"step": 220
|
|
},
|
|
{
|
|
"epoch": 3.82,
|
|
"learning_rate": 1.6770833333333332e-05,
|
|
"loss": 0.7669,
|
|
"step": 230
|
|
},
|
|
{
|
|
"epoch": 3.98,
|
|
"learning_rate": 1.75e-05,
|
|
"loss": 0.7273,
|
|
"step": 240
|
|
},
|
|
{
|
|
"epoch": 4.0,
|
|
"eval_accuracy": 0.6956521739130435,
|
|
"eval_loss": 0.7708569169044495,
|
|
"eval_runtime": 0.6952,
|
|
"eval_samples_per_second": 66.169,
|
|
"eval_steps_per_second": 4.315,
|
|
"step": 241
|
|
},
|
|
{
|
|
"epoch": 4.15,
|
|
"learning_rate": 1.8229166666666665e-05,
|
|
"loss": 0.6937,
|
|
"step": 250
|
|
},
|
|
{
|
|
"epoch": 4.32,
|
|
"learning_rate": 1.895833333333333e-05,
|
|
"loss": 0.6738,
|
|
"step": 260
|
|
},
|
|
{
|
|
"epoch": 4.48,
|
|
"learning_rate": 1.9687499999999997e-05,
|
|
"loss": 0.6577,
|
|
"step": 270
|
|
},
|
|
{
|
|
"epoch": 4.65,
|
|
"learning_rate": 2.0416666666666667e-05,
|
|
"loss": 0.6679,
|
|
"step": 280
|
|
},
|
|
{
|
|
"epoch": 4.81,
|
|
"learning_rate": 2.114583333333333e-05,
|
|
"loss": 0.5962,
|
|
"step": 290
|
|
},
|
|
{
|
|
"epoch": 4.98,
|
|
"learning_rate": 2.1874999999999996e-05,
|
|
"loss": 0.5246,
|
|
"step": 300
|
|
},
|
|
{
|
|
"epoch": 5.0,
|
|
"eval_accuracy": 0.7391304347826086,
|
|
"eval_loss": 0.7217094302177429,
|
|
"eval_runtime": 0.6875,
|
|
"eval_samples_per_second": 66.909,
|
|
"eval_steps_per_second": 4.364,
|
|
"step": 301
|
|
},
|
|
{
|
|
"epoch": 5.15,
|
|
"learning_rate": 2.2604166666666666e-05,
|
|
"loss": 0.4905,
|
|
"step": 310
|
|
},
|
|
{
|
|
"epoch": 5.31,
|
|
"learning_rate": 2.333333333333333e-05,
|
|
"loss": 0.4651,
|
|
"step": 320
|
|
},
|
|
{
|
|
"epoch": 5.48,
|
|
"learning_rate": 2.40625e-05,
|
|
"loss": 0.3947,
|
|
"step": 330
|
|
},
|
|
{
|
|
"epoch": 5.64,
|
|
"learning_rate": 2.4791666666666665e-05,
|
|
"loss": 0.3819,
|
|
"step": 340
|
|
},
|
|
{
|
|
"epoch": 5.81,
|
|
"learning_rate": 2.552083333333333e-05,
|
|
"loss": 0.3977,
|
|
"step": 350
|
|
},
|
|
{
|
|
"epoch": 5.98,
|
|
"learning_rate": 2.6249999999999998e-05,
|
|
"loss": 0.3645,
|
|
"step": 360
|
|
},
|
|
{
|
|
"epoch": 5.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 0.7142456769943237,
|
|
"eval_runtime": 0.6651,
|
|
"eval_samples_per_second": 69.157,
|
|
"eval_steps_per_second": 4.51,
|
|
"step": 361
|
|
},
|
|
{
|
|
"epoch": 6.14,
|
|
"learning_rate": 2.6979166666666664e-05,
|
|
"loss": 0.3382,
|
|
"step": 370
|
|
},
|
|
{
|
|
"epoch": 6.31,
|
|
"learning_rate": 2.770833333333333e-05,
|
|
"loss": 0.2709,
|
|
"step": 380
|
|
},
|
|
{
|
|
"epoch": 6.47,
|
|
"learning_rate": 2.8437499999999997e-05,
|
|
"loss": 0.2641,
|
|
"step": 390
|
|
},
|
|
{
|
|
"epoch": 6.64,
|
|
"learning_rate": 2.9166666666666666e-05,
|
|
"loss": 0.2518,
|
|
"step": 400
|
|
},
|
|
{
|
|
"epoch": 6.8,
|
|
"learning_rate": 2.989583333333333e-05,
|
|
"loss": 0.2631,
|
|
"step": 410
|
|
},
|
|
{
|
|
"epoch": 6.97,
|
|
"learning_rate": 3.0625e-05,
|
|
"loss": 0.2211,
|
|
"step": 420
|
|
},
|
|
{
|
|
"epoch": 6.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 0.6436068415641785,
|
|
"eval_runtime": 0.6647,
|
|
"eval_samples_per_second": 69.209,
|
|
"eval_steps_per_second": 4.514,
|
|
"step": 421
|
|
},
|
|
{
|
|
"epoch": 7.14,
|
|
"learning_rate": 3.1354166666666665e-05,
|
|
"loss": 0.225,
|
|
"step": 430
|
|
},
|
|
{
|
|
"epoch": 7.3,
|
|
"learning_rate": 3.208333333333333e-05,
|
|
"loss": 0.2625,
|
|
"step": 440
|
|
},
|
|
{
|
|
"epoch": 7.47,
|
|
"learning_rate": 3.28125e-05,
|
|
"loss": 0.183,
|
|
"step": 450
|
|
},
|
|
{
|
|
"epoch": 7.63,
|
|
"learning_rate": 3.3541666666666664e-05,
|
|
"loss": 0.1866,
|
|
"step": 460
|
|
},
|
|
{
|
|
"epoch": 7.8,
|
|
"learning_rate": 3.427083333333333e-05,
|
|
"loss": 0.2547,
|
|
"step": 470
|
|
},
|
|
{
|
|
"epoch": 7.97,
|
|
"learning_rate": 3.5e-05,
|
|
"loss": 0.266,
|
|
"step": 480
|
|
},
|
|
{
|
|
"epoch": 8.0,
|
|
"eval_accuracy": 0.6086956521739131,
|
|
"eval_loss": 1.1315726041793823,
|
|
"eval_runtime": 0.6621,
|
|
"eval_samples_per_second": 69.472,
|
|
"eval_steps_per_second": 4.531,
|
|
"step": 482
|
|
},
|
|
{
|
|
"epoch": 8.13,
|
|
"learning_rate": 3.491898148148148e-05,
|
|
"loss": 0.2918,
|
|
"step": 490
|
|
},
|
|
{
|
|
"epoch": 8.3,
|
|
"learning_rate": 3.483796296296296e-05,
|
|
"loss": 0.209,
|
|
"step": 500
|
|
},
|
|
{
|
|
"epoch": 8.46,
|
|
"learning_rate": 3.475694444444444e-05,
|
|
"loss": 0.1858,
|
|
"step": 510
|
|
},
|
|
{
|
|
"epoch": 8.63,
|
|
"learning_rate": 3.467592592592592e-05,
|
|
"loss": 0.1967,
|
|
"step": 520
|
|
},
|
|
{
|
|
"epoch": 8.8,
|
|
"learning_rate": 3.459490740740741e-05,
|
|
"loss": 0.1914,
|
|
"step": 530
|
|
},
|
|
{
|
|
"epoch": 8.96,
|
|
"learning_rate": 3.4513888888888886e-05,
|
|
"loss": 0.1235,
|
|
"step": 540
|
|
},
|
|
{
|
|
"epoch": 9.0,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 0.9256875514984131,
|
|
"eval_runtime": 0.6634,
|
|
"eval_samples_per_second": 69.344,
|
|
"eval_steps_per_second": 4.522,
|
|
"step": 542
|
|
},
|
|
{
|
|
"epoch": 9.13,
|
|
"learning_rate": 3.4432870370370365e-05,
|
|
"loss": 0.2088,
|
|
"step": 550
|
|
},
|
|
{
|
|
"epoch": 9.29,
|
|
"learning_rate": 3.435185185185185e-05,
|
|
"loss": 0.2034,
|
|
"step": 560
|
|
},
|
|
{
|
|
"epoch": 9.46,
|
|
"learning_rate": 3.427083333333333e-05,
|
|
"loss": 0.1289,
|
|
"step": 570
|
|
},
|
|
{
|
|
"epoch": 9.63,
|
|
"learning_rate": 3.418981481481481e-05,
|
|
"loss": 0.162,
|
|
"step": 580
|
|
},
|
|
{
|
|
"epoch": 9.79,
|
|
"learning_rate": 3.4108796296296296e-05,
|
|
"loss": 0.1831,
|
|
"step": 590
|
|
},
|
|
{
|
|
"epoch": 9.96,
|
|
"learning_rate": 3.4027777777777775e-05,
|
|
"loss": 0.1613,
|
|
"step": 600
|
|
},
|
|
{
|
|
"epoch": 9.99,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 0.8526802062988281,
|
|
"eval_runtime": 0.6424,
|
|
"eval_samples_per_second": 71.601,
|
|
"eval_steps_per_second": 4.67,
|
|
"step": 602
|
|
},
|
|
{
|
|
"epoch": 10.12,
|
|
"learning_rate": 3.3946759259259254e-05,
|
|
"loss": 0.0869,
|
|
"step": 610
|
|
},
|
|
{
|
|
"epoch": 10.29,
|
|
"learning_rate": 3.3865740740740734e-05,
|
|
"loss": 0.1814,
|
|
"step": 620
|
|
},
|
|
{
|
|
"epoch": 10.46,
|
|
"learning_rate": 3.378472222222222e-05,
|
|
"loss": 0.1488,
|
|
"step": 630
|
|
},
|
|
{
|
|
"epoch": 10.62,
|
|
"learning_rate": 3.37037037037037e-05,
|
|
"loss": 0.1249,
|
|
"step": 640
|
|
},
|
|
{
|
|
"epoch": 10.79,
|
|
"learning_rate": 3.3622685185185185e-05,
|
|
"loss": 0.1397,
|
|
"step": 650
|
|
},
|
|
{
|
|
"epoch": 10.95,
|
|
"learning_rate": 3.3541666666666664e-05,
|
|
"loss": 0.0946,
|
|
"step": 660
|
|
},
|
|
{
|
|
"epoch": 10.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 0.8274199366569519,
|
|
"eval_runtime": 0.8621,
|
|
"eval_samples_per_second": 53.358,
|
|
"eval_steps_per_second": 3.48,
|
|
"step": 662
|
|
},
|
|
{
|
|
"epoch": 11.12,
|
|
"learning_rate": 3.3460648148148144e-05,
|
|
"loss": 0.1215,
|
|
"step": 670
|
|
},
|
|
{
|
|
"epoch": 11.29,
|
|
"learning_rate": 3.337962962962963e-05,
|
|
"loss": 0.1146,
|
|
"step": 680
|
|
},
|
|
{
|
|
"epoch": 11.45,
|
|
"learning_rate": 3.329861111111111e-05,
|
|
"loss": 0.0991,
|
|
"step": 690
|
|
},
|
|
{
|
|
"epoch": 11.62,
|
|
"learning_rate": 3.321759259259259e-05,
|
|
"loss": 0.1463,
|
|
"step": 700
|
|
},
|
|
{
|
|
"epoch": 11.78,
|
|
"learning_rate": 3.3136574074074074e-05,
|
|
"loss": 0.1049,
|
|
"step": 710
|
|
},
|
|
{
|
|
"epoch": 11.95,
|
|
"learning_rate": 3.3055555555555553e-05,
|
|
"loss": 0.1392,
|
|
"step": 720
|
|
},
|
|
{
|
|
"epoch": 12.0,
|
|
"eval_accuracy": 0.7608695652173914,
|
|
"eval_loss": 0.8311833739280701,
|
|
"eval_runtime": 0.6987,
|
|
"eval_samples_per_second": 65.839,
|
|
"eval_steps_per_second": 4.294,
|
|
"step": 723
|
|
},
|
|
{
|
|
"epoch": 12.12,
|
|
"learning_rate": 3.297453703703703e-05,
|
|
"loss": 0.1116,
|
|
"step": 730
|
|
},
|
|
{
|
|
"epoch": 12.28,
|
|
"learning_rate": 3.289351851851851e-05,
|
|
"loss": 0.1217,
|
|
"step": 740
|
|
},
|
|
{
|
|
"epoch": 12.45,
|
|
"learning_rate": 3.28125e-05,
|
|
"loss": 0.0952,
|
|
"step": 750
|
|
},
|
|
{
|
|
"epoch": 12.61,
|
|
"learning_rate": 3.273148148148148e-05,
|
|
"loss": 0.0972,
|
|
"step": 760
|
|
},
|
|
{
|
|
"epoch": 12.78,
|
|
"learning_rate": 3.2650462962962956e-05,
|
|
"loss": 0.1193,
|
|
"step": 770
|
|
},
|
|
{
|
|
"epoch": 12.95,
|
|
"learning_rate": 3.256944444444444e-05,
|
|
"loss": 0.1028,
|
|
"step": 780
|
|
},
|
|
{
|
|
"epoch": 13.0,
|
|
"eval_accuracy": 0.7608695652173914,
|
|
"eval_loss": 1.1959415674209595,
|
|
"eval_runtime": 0.6697,
|
|
"eval_samples_per_second": 68.691,
|
|
"eval_steps_per_second": 4.48,
|
|
"step": 783
|
|
},
|
|
{
|
|
"epoch": 13.11,
|
|
"learning_rate": 3.248842592592592e-05,
|
|
"loss": 0.1014,
|
|
"step": 790
|
|
},
|
|
{
|
|
"epoch": 13.28,
|
|
"learning_rate": 3.240740740740741e-05,
|
|
"loss": 0.1055,
|
|
"step": 800
|
|
},
|
|
{
|
|
"epoch": 13.44,
|
|
"learning_rate": 3.232638888888889e-05,
|
|
"loss": 0.1317,
|
|
"step": 810
|
|
},
|
|
{
|
|
"epoch": 13.61,
|
|
"learning_rate": 3.2245370370370366e-05,
|
|
"loss": 0.0899,
|
|
"step": 820
|
|
},
|
|
{
|
|
"epoch": 13.78,
|
|
"learning_rate": 3.216435185185185e-05,
|
|
"loss": 0.1158,
|
|
"step": 830
|
|
},
|
|
{
|
|
"epoch": 13.94,
|
|
"learning_rate": 3.208333333333333e-05,
|
|
"loss": 0.1072,
|
|
"step": 840
|
|
},
|
|
{
|
|
"epoch": 13.99,
|
|
"eval_accuracy": 0.7391304347826086,
|
|
"eval_loss": 1.0017004013061523,
|
|
"eval_runtime": 0.6881,
|
|
"eval_samples_per_second": 66.851,
|
|
"eval_steps_per_second": 4.36,
|
|
"step": 843
|
|
},
|
|
{
|
|
"epoch": 14.11,
|
|
"learning_rate": 3.200231481481481e-05,
|
|
"loss": 0.0812,
|
|
"step": 850
|
|
},
|
|
{
|
|
"epoch": 14.27,
|
|
"learning_rate": 3.19212962962963e-05,
|
|
"loss": 0.1025,
|
|
"step": 860
|
|
},
|
|
{
|
|
"epoch": 14.44,
|
|
"learning_rate": 3.1840277777777776e-05,
|
|
"loss": 0.1296,
|
|
"step": 870
|
|
},
|
|
{
|
|
"epoch": 14.61,
|
|
"learning_rate": 3.1759259259259255e-05,
|
|
"loss": 0.146,
|
|
"step": 880
|
|
},
|
|
{
|
|
"epoch": 14.77,
|
|
"learning_rate": 3.1678240740740735e-05,
|
|
"loss": 0.0782,
|
|
"step": 890
|
|
},
|
|
{
|
|
"epoch": 14.94,
|
|
"learning_rate": 3.159722222222222e-05,
|
|
"loss": 0.0888,
|
|
"step": 900
|
|
},
|
|
{
|
|
"epoch": 14.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 0.921440064907074,
|
|
"eval_runtime": 0.714,
|
|
"eval_samples_per_second": 64.425,
|
|
"eval_steps_per_second": 4.202,
|
|
"step": 903
|
|
},
|
|
{
|
|
"epoch": 15.1,
|
|
"learning_rate": 3.15162037037037e-05,
|
|
"loss": 0.077,
|
|
"step": 910
|
|
},
|
|
{
|
|
"epoch": 15.27,
|
|
"learning_rate": 3.143518518518518e-05,
|
|
"loss": 0.0821,
|
|
"step": 920
|
|
},
|
|
{
|
|
"epoch": 15.44,
|
|
"learning_rate": 3.1354166666666665e-05,
|
|
"loss": 0.0692,
|
|
"step": 930
|
|
},
|
|
{
|
|
"epoch": 15.6,
|
|
"learning_rate": 3.1273148148148145e-05,
|
|
"loss": 0.0801,
|
|
"step": 940
|
|
},
|
|
{
|
|
"epoch": 15.77,
|
|
"learning_rate": 3.119212962962963e-05,
|
|
"loss": 0.1015,
|
|
"step": 950
|
|
},
|
|
{
|
|
"epoch": 15.93,
|
|
"learning_rate": 3.111111111111111e-05,
|
|
"loss": 0.0951,
|
|
"step": 960
|
|
},
|
|
{
|
|
"epoch": 16.0,
|
|
"eval_accuracy": 0.7608695652173914,
|
|
"eval_loss": 0.9156137704849243,
|
|
"eval_runtime": 0.6762,
|
|
"eval_samples_per_second": 68.028,
|
|
"eval_steps_per_second": 4.437,
|
|
"step": 964
|
|
},
|
|
{
|
|
"epoch": 16.1,
|
|
"learning_rate": 3.103009259259259e-05,
|
|
"loss": 0.0768,
|
|
"step": 970
|
|
},
|
|
{
|
|
"epoch": 16.27,
|
|
"learning_rate": 3.0949074074074075e-05,
|
|
"loss": 0.0494,
|
|
"step": 980
|
|
},
|
|
{
|
|
"epoch": 16.43,
|
|
"learning_rate": 3.0868055555555554e-05,
|
|
"loss": 0.1444,
|
|
"step": 990
|
|
},
|
|
{
|
|
"epoch": 16.6,
|
|
"learning_rate": 3.0787037037037034e-05,
|
|
"loss": 0.086,
|
|
"step": 1000
|
|
},
|
|
{
|
|
"epoch": 16.76,
|
|
"learning_rate": 3.070601851851851e-05,
|
|
"loss": 0.0883,
|
|
"step": 1010
|
|
},
|
|
{
|
|
"epoch": 16.93,
|
|
"learning_rate": 3.0625e-05,
|
|
"loss": 0.0714,
|
|
"step": 1020
|
|
},
|
|
{
|
|
"epoch": 17.0,
|
|
"eval_accuracy": 0.6956521739130435,
|
|
"eval_loss": 1.3116263151168823,
|
|
"eval_runtime": 0.6431,
|
|
"eval_samples_per_second": 71.525,
|
|
"eval_steps_per_second": 4.665,
|
|
"step": 1024
|
|
},
|
|
{
|
|
"epoch": 17.1,
|
|
"learning_rate": 3.054398148148148e-05,
|
|
"loss": 0.0637,
|
|
"step": 1030
|
|
},
|
|
{
|
|
"epoch": 17.26,
|
|
"learning_rate": 3.046296296296296e-05,
|
|
"loss": 0.0581,
|
|
"step": 1040
|
|
},
|
|
{
|
|
"epoch": 17.43,
|
|
"learning_rate": 3.0381944444444444e-05,
|
|
"loss": 0.1138,
|
|
"step": 1050
|
|
},
|
|
{
|
|
"epoch": 17.59,
|
|
"learning_rate": 3.0300925925925923e-05,
|
|
"loss": 0.0894,
|
|
"step": 1060
|
|
},
|
|
{
|
|
"epoch": 17.76,
|
|
"learning_rate": 3.0219907407407405e-05,
|
|
"loss": 0.065,
|
|
"step": 1070
|
|
},
|
|
{
|
|
"epoch": 17.93,
|
|
"learning_rate": 3.0138888888888888e-05,
|
|
"loss": 0.0804,
|
|
"step": 1080
|
|
},
|
|
{
|
|
"epoch": 17.99,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.1107388734817505,
|
|
"eval_runtime": 0.7361,
|
|
"eval_samples_per_second": 62.487,
|
|
"eval_steps_per_second": 4.075,
|
|
"step": 1084
|
|
},
|
|
{
|
|
"epoch": 18.09,
|
|
"learning_rate": 3.0057870370370367e-05,
|
|
"loss": 0.1195,
|
|
"step": 1090
|
|
},
|
|
{
|
|
"epoch": 18.26,
|
|
"learning_rate": 2.997685185185185e-05,
|
|
"loss": 0.117,
|
|
"step": 1100
|
|
},
|
|
{
|
|
"epoch": 18.42,
|
|
"learning_rate": 2.989583333333333e-05,
|
|
"loss": 0.0535,
|
|
"step": 1110
|
|
},
|
|
{
|
|
"epoch": 18.59,
|
|
"learning_rate": 2.9814814814814812e-05,
|
|
"loss": 0.0778,
|
|
"step": 1120
|
|
},
|
|
{
|
|
"epoch": 18.76,
|
|
"learning_rate": 2.9733796296296295e-05,
|
|
"loss": 0.1094,
|
|
"step": 1130
|
|
},
|
|
{
|
|
"epoch": 18.92,
|
|
"learning_rate": 2.9652777777777774e-05,
|
|
"loss": 0.08,
|
|
"step": 1140
|
|
},
|
|
{
|
|
"epoch": 18.99,
|
|
"eval_accuracy": 0.8478260869565217,
|
|
"eval_loss": 0.8104602694511414,
|
|
"eval_runtime": 0.9009,
|
|
"eval_samples_per_second": 51.063,
|
|
"eval_steps_per_second": 3.33,
|
|
"step": 1144
|
|
},
|
|
{
|
|
"epoch": 19.09,
|
|
"learning_rate": 2.9571759259259257e-05,
|
|
"loss": 0.058,
|
|
"step": 1150
|
|
},
|
|
{
|
|
"epoch": 19.25,
|
|
"learning_rate": 2.9490740740740736e-05,
|
|
"loss": 0.0811,
|
|
"step": 1160
|
|
},
|
|
{
|
|
"epoch": 19.42,
|
|
"learning_rate": 2.940972222222222e-05,
|
|
"loss": 0.0626,
|
|
"step": 1170
|
|
},
|
|
{
|
|
"epoch": 19.59,
|
|
"learning_rate": 2.9328703703703698e-05,
|
|
"loss": 0.0502,
|
|
"step": 1180
|
|
},
|
|
{
|
|
"epoch": 19.75,
|
|
"learning_rate": 2.9247685185185184e-05,
|
|
"loss": 0.0799,
|
|
"step": 1190
|
|
},
|
|
{
|
|
"epoch": 19.92,
|
|
"learning_rate": 2.9166666666666666e-05,
|
|
"loss": 0.1619,
|
|
"step": 1200
|
|
},
|
|
{
|
|
"epoch": 20.0,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 0.7581048011779785,
|
|
"eval_runtime": 0.6451,
|
|
"eval_samples_per_second": 71.304,
|
|
"eval_steps_per_second": 4.65,
|
|
"step": 1205
|
|
},
|
|
{
|
|
"epoch": 20.08,
|
|
"learning_rate": 2.9085648148148146e-05,
|
|
"loss": 0.0787,
|
|
"step": 1210
|
|
},
|
|
{
|
|
"epoch": 20.25,
|
|
"learning_rate": 2.9004629629629628e-05,
|
|
"loss": 0.0764,
|
|
"step": 1220
|
|
},
|
|
{
|
|
"epoch": 20.41,
|
|
"learning_rate": 2.8923611111111108e-05,
|
|
"loss": 0.0618,
|
|
"step": 1230
|
|
},
|
|
{
|
|
"epoch": 20.58,
|
|
"learning_rate": 2.884259259259259e-05,
|
|
"loss": 0.0612,
|
|
"step": 1240
|
|
},
|
|
{
|
|
"epoch": 20.75,
|
|
"learning_rate": 2.8761574074074073e-05,
|
|
"loss": 0.0666,
|
|
"step": 1250
|
|
},
|
|
{
|
|
"epoch": 20.91,
|
|
"learning_rate": 2.8680555555555552e-05,
|
|
"loss": 0.084,
|
|
"step": 1260
|
|
},
|
|
{
|
|
"epoch": 21.0,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 1.0210282802581787,
|
|
"eval_runtime": 0.6611,
|
|
"eval_samples_per_second": 69.584,
|
|
"eval_steps_per_second": 4.538,
|
|
"step": 1265
|
|
},
|
|
{
|
|
"epoch": 21.08,
|
|
"learning_rate": 2.8599537037037035e-05,
|
|
"loss": 0.068,
|
|
"step": 1270
|
|
},
|
|
{
|
|
"epoch": 21.24,
|
|
"learning_rate": 2.8518518518518514e-05,
|
|
"loss": 0.0447,
|
|
"step": 1280
|
|
},
|
|
{
|
|
"epoch": 21.41,
|
|
"learning_rate": 2.8437499999999997e-05,
|
|
"loss": 0.0568,
|
|
"step": 1290
|
|
},
|
|
{
|
|
"epoch": 21.58,
|
|
"learning_rate": 2.835648148148148e-05,
|
|
"loss": 0.0823,
|
|
"step": 1300
|
|
},
|
|
{
|
|
"epoch": 21.74,
|
|
"learning_rate": 2.827546296296296e-05,
|
|
"loss": 0.0473,
|
|
"step": 1310
|
|
},
|
|
{
|
|
"epoch": 21.91,
|
|
"learning_rate": 2.819444444444444e-05,
|
|
"loss": 0.072,
|
|
"step": 1320
|
|
},
|
|
{
|
|
"epoch": 21.99,
|
|
"eval_accuracy": 0.7608695652173914,
|
|
"eval_loss": 1.3092247247695923,
|
|
"eval_runtime": 0.6707,
|
|
"eval_samples_per_second": 68.585,
|
|
"eval_steps_per_second": 4.473,
|
|
"step": 1325
|
|
},
|
|
{
|
|
"epoch": 22.07,
|
|
"learning_rate": 2.811342592592592e-05,
|
|
"loss": 0.0629,
|
|
"step": 1330
|
|
},
|
|
{
|
|
"epoch": 22.24,
|
|
"learning_rate": 2.8032407407407407e-05,
|
|
"loss": 0.0908,
|
|
"step": 1340
|
|
},
|
|
{
|
|
"epoch": 22.41,
|
|
"learning_rate": 2.795138888888889e-05,
|
|
"loss": 0.0744,
|
|
"step": 1350
|
|
},
|
|
{
|
|
"epoch": 22.57,
|
|
"learning_rate": 2.787037037037037e-05,
|
|
"loss": 0.055,
|
|
"step": 1360
|
|
},
|
|
{
|
|
"epoch": 22.74,
|
|
"learning_rate": 2.778935185185185e-05,
|
|
"loss": 0.0572,
|
|
"step": 1370
|
|
},
|
|
{
|
|
"epoch": 22.9,
|
|
"learning_rate": 2.770833333333333e-05,
|
|
"loss": 0.0303,
|
|
"step": 1380
|
|
},
|
|
{
|
|
"epoch": 22.99,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.3367185592651367,
|
|
"eval_runtime": 0.665,
|
|
"eval_samples_per_second": 69.17,
|
|
"eval_steps_per_second": 4.511,
|
|
"step": 1385
|
|
},
|
|
{
|
|
"epoch": 23.07,
|
|
"learning_rate": 2.7627314814814813e-05,
|
|
"loss": 0.0785,
|
|
"step": 1390
|
|
},
|
|
{
|
|
"epoch": 23.24,
|
|
"learning_rate": 2.7546296296296296e-05,
|
|
"loss": 0.0877,
|
|
"step": 1400
|
|
},
|
|
{
|
|
"epoch": 23.4,
|
|
"learning_rate": 2.7465277777777775e-05,
|
|
"loss": 0.0482,
|
|
"step": 1410
|
|
},
|
|
{
|
|
"epoch": 23.57,
|
|
"learning_rate": 2.7384259259259258e-05,
|
|
"loss": 0.0712,
|
|
"step": 1420
|
|
},
|
|
{
|
|
"epoch": 23.73,
|
|
"learning_rate": 2.7303240740740737e-05,
|
|
"loss": 0.0381,
|
|
"step": 1430
|
|
},
|
|
{
|
|
"epoch": 23.9,
|
|
"learning_rate": 2.722222222222222e-05,
|
|
"loss": 0.0228,
|
|
"step": 1440
|
|
},
|
|
{
|
|
"epoch": 24.0,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 1.0276718139648438,
|
|
"eval_runtime": 0.7532,
|
|
"eval_samples_per_second": 61.074,
|
|
"eval_steps_per_second": 3.983,
|
|
"step": 1446
|
|
},
|
|
{
|
|
"epoch": 24.07,
|
|
"learning_rate": 2.71412037037037e-05,
|
|
"loss": 0.0696,
|
|
"step": 1450
|
|
},
|
|
{
|
|
"epoch": 24.23,
|
|
"learning_rate": 2.706018518518518e-05,
|
|
"loss": 0.0612,
|
|
"step": 1460
|
|
},
|
|
{
|
|
"epoch": 24.4,
|
|
"learning_rate": 2.6979166666666664e-05,
|
|
"loss": 0.0549,
|
|
"step": 1470
|
|
},
|
|
{
|
|
"epoch": 24.56,
|
|
"learning_rate": 2.6898148148148143e-05,
|
|
"loss": 0.0253,
|
|
"step": 1480
|
|
},
|
|
{
|
|
"epoch": 24.73,
|
|
"learning_rate": 2.681712962962963e-05,
|
|
"loss": 0.0472,
|
|
"step": 1490
|
|
},
|
|
{
|
|
"epoch": 24.9,
|
|
"learning_rate": 2.673611111111111e-05,
|
|
"loss": 0.0755,
|
|
"step": 1500
|
|
},
|
|
{
|
|
"epoch": 25.0,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 0.9436376094818115,
|
|
"eval_runtime": 0.6656,
|
|
"eval_samples_per_second": 69.111,
|
|
"eval_steps_per_second": 4.507,
|
|
"step": 1506
|
|
},
|
|
{
|
|
"epoch": 25.06,
|
|
"learning_rate": 2.665509259259259e-05,
|
|
"loss": 0.0743,
|
|
"step": 1510
|
|
},
|
|
{
|
|
"epoch": 25.23,
|
|
"learning_rate": 2.6574074074074074e-05,
|
|
"loss": 0.045,
|
|
"step": 1520
|
|
},
|
|
{
|
|
"epoch": 25.39,
|
|
"learning_rate": 2.6493055555555553e-05,
|
|
"loss": 0.0669,
|
|
"step": 1530
|
|
},
|
|
{
|
|
"epoch": 25.56,
|
|
"learning_rate": 2.6412037037037036e-05,
|
|
"loss": 0.0551,
|
|
"step": 1540
|
|
},
|
|
{
|
|
"epoch": 25.73,
|
|
"learning_rate": 2.6331018518518515e-05,
|
|
"loss": 0.0256,
|
|
"step": 1550
|
|
},
|
|
{
|
|
"epoch": 25.89,
|
|
"learning_rate": 2.6249999999999998e-05,
|
|
"loss": 0.0756,
|
|
"step": 1560
|
|
},
|
|
{
|
|
"epoch": 25.99,
|
|
"eval_accuracy": 0.7608695652173914,
|
|
"eval_loss": 1.1588078737258911,
|
|
"eval_runtime": 0.6572,
|
|
"eval_samples_per_second": 69.997,
|
|
"eval_steps_per_second": 4.565,
|
|
"step": 1566
|
|
},
|
|
{
|
|
"epoch": 26.06,
|
|
"learning_rate": 2.616898148148148e-05,
|
|
"loss": 0.0403,
|
|
"step": 1570
|
|
},
|
|
{
|
|
"epoch": 26.22,
|
|
"learning_rate": 2.608796296296296e-05,
|
|
"loss": 0.0486,
|
|
"step": 1580
|
|
},
|
|
{
|
|
"epoch": 26.39,
|
|
"learning_rate": 2.6006944444444442e-05,
|
|
"loss": 0.0489,
|
|
"step": 1590
|
|
},
|
|
{
|
|
"epoch": 26.56,
|
|
"learning_rate": 2.592592592592592e-05,
|
|
"loss": 0.0646,
|
|
"step": 1600
|
|
},
|
|
{
|
|
"epoch": 26.72,
|
|
"learning_rate": 2.5844907407407404e-05,
|
|
"loss": 0.0713,
|
|
"step": 1610
|
|
},
|
|
{
|
|
"epoch": 26.89,
|
|
"learning_rate": 2.5763888888888887e-05,
|
|
"loss": 0.0875,
|
|
"step": 1620
|
|
},
|
|
{
|
|
"epoch": 26.99,
|
|
"eval_accuracy": 0.717391304347826,
|
|
"eval_loss": 1.3280256986618042,
|
|
"eval_runtime": 0.7112,
|
|
"eval_samples_per_second": 64.681,
|
|
"eval_steps_per_second": 4.218,
|
|
"step": 1626
|
|
},
|
|
{
|
|
"epoch": 27.05,
|
|
"learning_rate": 2.5682870370370366e-05,
|
|
"loss": 0.0704,
|
|
"step": 1630
|
|
},
|
|
{
|
|
"epoch": 27.22,
|
|
"learning_rate": 2.5601851851851852e-05,
|
|
"loss": 0.0606,
|
|
"step": 1640
|
|
},
|
|
{
|
|
"epoch": 27.39,
|
|
"learning_rate": 2.552083333333333e-05,
|
|
"loss": 0.0385,
|
|
"step": 1650
|
|
},
|
|
{
|
|
"epoch": 27.55,
|
|
"learning_rate": 2.5439814814814814e-05,
|
|
"loss": 0.0494,
|
|
"step": 1660
|
|
},
|
|
{
|
|
"epoch": 27.72,
|
|
"learning_rate": 2.5358796296296297e-05,
|
|
"loss": 0.0171,
|
|
"step": 1670
|
|
},
|
|
{
|
|
"epoch": 27.88,
|
|
"learning_rate": 2.5277777777777776e-05,
|
|
"loss": 0.0771,
|
|
"step": 1680
|
|
},
|
|
{
|
|
"epoch": 28.0,
|
|
"eval_accuracy": 0.6739130434782609,
|
|
"eval_loss": 1.8558253049850464,
|
|
"eval_runtime": 0.6528,
|
|
"eval_samples_per_second": 70.468,
|
|
"eval_steps_per_second": 4.596,
|
|
"step": 1687
|
|
},
|
|
{
|
|
"epoch": 28.05,
|
|
"learning_rate": 2.519675925925926e-05,
|
|
"loss": 0.0447,
|
|
"step": 1690
|
|
},
|
|
{
|
|
"epoch": 28.22,
|
|
"learning_rate": 2.5115740740740738e-05,
|
|
"loss": 0.0365,
|
|
"step": 1700
|
|
},
|
|
{
|
|
"epoch": 28.38,
|
|
"learning_rate": 2.503472222222222e-05,
|
|
"loss": 0.0557,
|
|
"step": 1710
|
|
},
|
|
{
|
|
"epoch": 28.55,
|
|
"learning_rate": 2.49537037037037e-05,
|
|
"loss": 0.0447,
|
|
"step": 1720
|
|
},
|
|
{
|
|
"epoch": 28.71,
|
|
"learning_rate": 2.4872685185185182e-05,
|
|
"loss": 0.0412,
|
|
"step": 1730
|
|
},
|
|
{
|
|
"epoch": 28.88,
|
|
"learning_rate": 2.4791666666666665e-05,
|
|
"loss": 0.0467,
|
|
"step": 1740
|
|
},
|
|
{
|
|
"epoch": 29.0,
|
|
"eval_accuracy": 0.7391304347826086,
|
|
"eval_loss": 1.6476134061813354,
|
|
"eval_runtime": 0.6892,
|
|
"eval_samples_per_second": 66.748,
|
|
"eval_steps_per_second": 4.353,
|
|
"step": 1747
|
|
},
|
|
{
|
|
"epoch": 29.05,
|
|
"learning_rate": 2.4710648148148144e-05,
|
|
"loss": 0.0483,
|
|
"step": 1750
|
|
},
|
|
{
|
|
"epoch": 29.21,
|
|
"learning_rate": 2.4629629629629627e-05,
|
|
"loss": 0.1003,
|
|
"step": 1760
|
|
},
|
|
{
|
|
"epoch": 29.38,
|
|
"learning_rate": 2.4548611111111106e-05,
|
|
"loss": 0.0879,
|
|
"step": 1770
|
|
},
|
|
{
|
|
"epoch": 29.54,
|
|
"learning_rate": 2.446759259259259e-05,
|
|
"loss": 0.0333,
|
|
"step": 1780
|
|
},
|
|
{
|
|
"epoch": 29.71,
|
|
"learning_rate": 2.4386574074074075e-05,
|
|
"loss": 0.0849,
|
|
"step": 1790
|
|
},
|
|
{
|
|
"epoch": 29.88,
|
|
"learning_rate": 2.4305555555555554e-05,
|
|
"loss": 0.0382,
|
|
"step": 1800
|
|
},
|
|
{
|
|
"epoch": 29.99,
|
|
"eval_accuracy": 0.8478260869565217,
|
|
"eval_loss": 0.9374191164970398,
|
|
"eval_runtime": 0.7092,
|
|
"eval_samples_per_second": 64.864,
|
|
"eval_steps_per_second": 4.23,
|
|
"step": 1807
|
|
},
|
|
{
|
|
"epoch": 30.04,
|
|
"learning_rate": 2.4224537037037037e-05,
|
|
"loss": 0.0482,
|
|
"step": 1810
|
|
},
|
|
{
|
|
"epoch": 30.21,
|
|
"learning_rate": 2.4143518518518516e-05,
|
|
"loss": 0.0232,
|
|
"step": 1820
|
|
},
|
|
{
|
|
"epoch": 30.37,
|
|
"learning_rate": 2.40625e-05,
|
|
"loss": 0.0342,
|
|
"step": 1830
|
|
},
|
|
{
|
|
"epoch": 30.54,
|
|
"learning_rate": 2.398148148148148e-05,
|
|
"loss": 0.0344,
|
|
"step": 1840
|
|
},
|
|
{
|
|
"epoch": 30.71,
|
|
"learning_rate": 2.390046296296296e-05,
|
|
"loss": 0.0325,
|
|
"step": 1850
|
|
},
|
|
{
|
|
"epoch": 30.87,
|
|
"learning_rate": 2.3819444444444443e-05,
|
|
"loss": 0.0511,
|
|
"step": 1860
|
|
},
|
|
{
|
|
"epoch": 30.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.0847010612487793,
|
|
"eval_runtime": 0.7072,
|
|
"eval_samples_per_second": 65.049,
|
|
"eval_steps_per_second": 4.242,
|
|
"step": 1867
|
|
},
|
|
{
|
|
"epoch": 31.04,
|
|
"learning_rate": 2.3738425925925923e-05,
|
|
"loss": 0.0243,
|
|
"step": 1870
|
|
},
|
|
{
|
|
"epoch": 31.2,
|
|
"learning_rate": 2.3657407407407405e-05,
|
|
"loss": 0.0466,
|
|
"step": 1880
|
|
},
|
|
{
|
|
"epoch": 31.37,
|
|
"learning_rate": 2.3576388888888888e-05,
|
|
"loss": 0.0543,
|
|
"step": 1890
|
|
},
|
|
{
|
|
"epoch": 31.54,
|
|
"learning_rate": 2.3495370370370367e-05,
|
|
"loss": 0.0415,
|
|
"step": 1900
|
|
},
|
|
{
|
|
"epoch": 31.7,
|
|
"learning_rate": 2.341435185185185e-05,
|
|
"loss": 0.0327,
|
|
"step": 1910
|
|
},
|
|
{
|
|
"epoch": 31.87,
|
|
"learning_rate": 2.333333333333333e-05,
|
|
"loss": 0.0161,
|
|
"step": 1920
|
|
},
|
|
{
|
|
"epoch": 32.0,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.2028273344039917,
|
|
"eval_runtime": 0.6812,
|
|
"eval_samples_per_second": 67.531,
|
|
"eval_steps_per_second": 4.404,
|
|
"step": 1928
|
|
},
|
|
{
|
|
"epoch": 32.03,
|
|
"learning_rate": 2.325231481481481e-05,
|
|
"loss": 0.0372,
|
|
"step": 1930
|
|
},
|
|
{
|
|
"epoch": 32.2,
|
|
"learning_rate": 2.3171296296296298e-05,
|
|
"loss": 0.0433,
|
|
"step": 1940
|
|
},
|
|
{
|
|
"epoch": 32.37,
|
|
"learning_rate": 2.3090277777777777e-05,
|
|
"loss": 0.0536,
|
|
"step": 1950
|
|
},
|
|
{
|
|
"epoch": 32.53,
|
|
"learning_rate": 2.300925925925926e-05,
|
|
"loss": 0.0497,
|
|
"step": 1960
|
|
},
|
|
{
|
|
"epoch": 32.7,
|
|
"learning_rate": 2.292824074074074e-05,
|
|
"loss": 0.0317,
|
|
"step": 1970
|
|
},
|
|
{
|
|
"epoch": 32.86,
|
|
"learning_rate": 2.284722222222222e-05,
|
|
"loss": 0.0301,
|
|
"step": 1980
|
|
},
|
|
{
|
|
"epoch": 33.0,
|
|
"eval_accuracy": 0.7391304347826086,
|
|
"eval_loss": 1.297067403793335,
|
|
"eval_runtime": 0.6982,
|
|
"eval_samples_per_second": 65.888,
|
|
"eval_steps_per_second": 4.297,
|
|
"step": 1988
|
|
},
|
|
{
|
|
"epoch": 33.03,
|
|
"learning_rate": 2.27662037037037e-05,
|
|
"loss": 0.0488,
|
|
"step": 1990
|
|
},
|
|
{
|
|
"epoch": 33.2,
|
|
"learning_rate": 2.2685185185185183e-05,
|
|
"loss": 0.0382,
|
|
"step": 2000
|
|
},
|
|
{
|
|
"epoch": 33.36,
|
|
"learning_rate": 2.2604166666666666e-05,
|
|
"loss": 0.0576,
|
|
"step": 2010
|
|
},
|
|
{
|
|
"epoch": 33.53,
|
|
"learning_rate": 2.2523148148148145e-05,
|
|
"loss": 0.0111,
|
|
"step": 2020
|
|
},
|
|
{
|
|
"epoch": 33.69,
|
|
"learning_rate": 2.2442129629629628e-05,
|
|
"loss": 0.0685,
|
|
"step": 2030
|
|
},
|
|
{
|
|
"epoch": 33.86,
|
|
"learning_rate": 2.2361111111111107e-05,
|
|
"loss": 0.0443,
|
|
"step": 2040
|
|
},
|
|
{
|
|
"epoch": 33.99,
|
|
"eval_accuracy": 0.717391304347826,
|
|
"eval_loss": 1.3993148803710938,
|
|
"eval_runtime": 0.6712,
|
|
"eval_samples_per_second": 68.537,
|
|
"eval_steps_per_second": 4.47,
|
|
"step": 2048
|
|
},
|
|
{
|
|
"epoch": 34.02,
|
|
"learning_rate": 2.228009259259259e-05,
|
|
"loss": 0.0488,
|
|
"step": 2050
|
|
},
|
|
{
|
|
"epoch": 34.19,
|
|
"learning_rate": 2.2199074074074073e-05,
|
|
"loss": 0.0322,
|
|
"step": 2060
|
|
},
|
|
{
|
|
"epoch": 34.36,
|
|
"learning_rate": 2.2118055555555552e-05,
|
|
"loss": 0.0353,
|
|
"step": 2070
|
|
},
|
|
{
|
|
"epoch": 34.52,
|
|
"learning_rate": 2.2037037037037034e-05,
|
|
"loss": 0.0387,
|
|
"step": 2080
|
|
},
|
|
{
|
|
"epoch": 34.69,
|
|
"learning_rate": 2.1956018518518514e-05,
|
|
"loss": 0.086,
|
|
"step": 2090
|
|
},
|
|
{
|
|
"epoch": 34.85,
|
|
"learning_rate": 2.1874999999999996e-05,
|
|
"loss": 0.0782,
|
|
"step": 2100
|
|
},
|
|
{
|
|
"epoch": 34.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.3358551263809204,
|
|
"eval_runtime": 0.6692,
|
|
"eval_samples_per_second": 68.742,
|
|
"eval_steps_per_second": 4.483,
|
|
"step": 2108
|
|
},
|
|
{
|
|
"epoch": 35.02,
|
|
"learning_rate": 2.1793981481481482e-05,
|
|
"loss": 0.0232,
|
|
"step": 2110
|
|
},
|
|
{
|
|
"epoch": 35.19,
|
|
"learning_rate": 2.171296296296296e-05,
|
|
"loss": 0.0295,
|
|
"step": 2120
|
|
},
|
|
{
|
|
"epoch": 35.35,
|
|
"learning_rate": 2.1631944444444444e-05,
|
|
"loss": 0.0456,
|
|
"step": 2130
|
|
},
|
|
{
|
|
"epoch": 35.52,
|
|
"learning_rate": 2.1550925925925924e-05,
|
|
"loss": 0.0266,
|
|
"step": 2140
|
|
},
|
|
{
|
|
"epoch": 35.68,
|
|
"learning_rate": 2.1469907407407406e-05,
|
|
"loss": 0.0462,
|
|
"step": 2150
|
|
},
|
|
{
|
|
"epoch": 35.85,
|
|
"learning_rate": 2.138888888888889e-05,
|
|
"loss": 0.0287,
|
|
"step": 2160
|
|
},
|
|
{
|
|
"epoch": 36.0,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.3011459112167358,
|
|
"eval_runtime": 0.7002,
|
|
"eval_samples_per_second": 65.698,
|
|
"eval_steps_per_second": 4.285,
|
|
"step": 2169
|
|
},
|
|
{
|
|
"epoch": 36.02,
|
|
"learning_rate": 2.1307870370370368e-05,
|
|
"loss": 0.0502,
|
|
"step": 2170
|
|
},
|
|
{
|
|
"epoch": 36.18,
|
|
"learning_rate": 2.122685185185185e-05,
|
|
"loss": 0.0499,
|
|
"step": 2180
|
|
},
|
|
{
|
|
"epoch": 36.35,
|
|
"learning_rate": 2.114583333333333e-05,
|
|
"loss": 0.0313,
|
|
"step": 2190
|
|
},
|
|
{
|
|
"epoch": 36.51,
|
|
"learning_rate": 2.1064814814814813e-05,
|
|
"loss": 0.0467,
|
|
"step": 2200
|
|
},
|
|
{
|
|
"epoch": 36.68,
|
|
"learning_rate": 2.0983796296296295e-05,
|
|
"loss": 0.0496,
|
|
"step": 2210
|
|
},
|
|
{
|
|
"epoch": 36.85,
|
|
"learning_rate": 2.0902777777777775e-05,
|
|
"loss": 0.0347,
|
|
"step": 2220
|
|
},
|
|
{
|
|
"epoch": 37.0,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.2450307607650757,
|
|
"eval_runtime": 0.7057,
|
|
"eval_samples_per_second": 65.186,
|
|
"eval_steps_per_second": 4.251,
|
|
"step": 2229
|
|
},
|
|
{
|
|
"epoch": 37.01,
|
|
"learning_rate": 2.0821759259259257e-05,
|
|
"loss": 0.0322,
|
|
"step": 2230
|
|
},
|
|
{
|
|
"epoch": 37.18,
|
|
"learning_rate": 2.0740740740740737e-05,
|
|
"loss": 0.0213,
|
|
"step": 2240
|
|
},
|
|
{
|
|
"epoch": 37.34,
|
|
"learning_rate": 2.065972222222222e-05,
|
|
"loss": 0.0215,
|
|
"step": 2250
|
|
},
|
|
{
|
|
"epoch": 37.51,
|
|
"learning_rate": 2.05787037037037e-05,
|
|
"loss": 0.0172,
|
|
"step": 2260
|
|
},
|
|
{
|
|
"epoch": 37.68,
|
|
"learning_rate": 2.0497685185185184e-05,
|
|
"loss": 0.0264,
|
|
"step": 2270
|
|
},
|
|
{
|
|
"epoch": 37.84,
|
|
"learning_rate": 2.0416666666666667e-05,
|
|
"loss": 0.0538,
|
|
"step": 2280
|
|
},
|
|
{
|
|
"epoch": 37.99,
|
|
"eval_accuracy": 0.7608695652173914,
|
|
"eval_loss": 1.8215843439102173,
|
|
"eval_runtime": 0.6952,
|
|
"eval_samples_per_second": 66.171,
|
|
"eval_steps_per_second": 4.315,
|
|
"step": 2289
|
|
},
|
|
{
|
|
"epoch": 38.01,
|
|
"learning_rate": 2.0335648148148146e-05,
|
|
"loss": 0.0541,
|
|
"step": 2290
|
|
},
|
|
{
|
|
"epoch": 38.17,
|
|
"learning_rate": 2.025462962962963e-05,
|
|
"loss": 0.0261,
|
|
"step": 2300
|
|
},
|
|
{
|
|
"epoch": 38.34,
|
|
"learning_rate": 2.017361111111111e-05,
|
|
"loss": 0.0371,
|
|
"step": 2310
|
|
},
|
|
{
|
|
"epoch": 38.51,
|
|
"learning_rate": 2.009259259259259e-05,
|
|
"loss": 0.028,
|
|
"step": 2320
|
|
},
|
|
{
|
|
"epoch": 38.67,
|
|
"learning_rate": 2.0011574074074074e-05,
|
|
"loss": 0.0167,
|
|
"step": 2330
|
|
},
|
|
{
|
|
"epoch": 38.84,
|
|
"learning_rate": 1.9930555555555553e-05,
|
|
"loss": 0.027,
|
|
"step": 2340
|
|
},
|
|
{
|
|
"epoch": 38.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.170117735862732,
|
|
"eval_runtime": 0.7309,
|
|
"eval_samples_per_second": 62.933,
|
|
"eval_steps_per_second": 4.104,
|
|
"step": 2349
|
|
},
|
|
{
|
|
"epoch": 39.0,
|
|
"learning_rate": 1.9849537037037036e-05,
|
|
"loss": 0.0375,
|
|
"step": 2350
|
|
},
|
|
{
|
|
"epoch": 39.17,
|
|
"learning_rate": 1.9768518518518515e-05,
|
|
"loss": 0.0467,
|
|
"step": 2360
|
|
},
|
|
{
|
|
"epoch": 39.34,
|
|
"learning_rate": 1.9687499999999997e-05,
|
|
"loss": 0.03,
|
|
"step": 2370
|
|
},
|
|
{
|
|
"epoch": 39.5,
|
|
"learning_rate": 1.960648148148148e-05,
|
|
"loss": 0.0243,
|
|
"step": 2380
|
|
},
|
|
{
|
|
"epoch": 39.67,
|
|
"learning_rate": 1.952546296296296e-05,
|
|
"loss": 0.0393,
|
|
"step": 2390
|
|
},
|
|
{
|
|
"epoch": 39.83,
|
|
"learning_rate": 1.9444444444444442e-05,
|
|
"loss": 0.0284,
|
|
"step": 2400
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"learning_rate": 1.936342592592592e-05,
|
|
"loss": 0.038,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 40.0,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.1025199890136719,
|
|
"eval_runtime": 0.7243,
|
|
"eval_samples_per_second": 63.514,
|
|
"eval_steps_per_second": 4.142,
|
|
"step": 2410
|
|
},
|
|
{
|
|
"epoch": 40.17,
|
|
"learning_rate": 1.9282407407407407e-05,
|
|
"loss": 0.0243,
|
|
"step": 2420
|
|
},
|
|
{
|
|
"epoch": 40.33,
|
|
"learning_rate": 1.920138888888889e-05,
|
|
"loss": 0.049,
|
|
"step": 2430
|
|
},
|
|
{
|
|
"epoch": 40.5,
|
|
"learning_rate": 1.912037037037037e-05,
|
|
"loss": 0.0319,
|
|
"step": 2440
|
|
},
|
|
{
|
|
"epoch": 40.66,
|
|
"learning_rate": 1.9039351851851852e-05,
|
|
"loss": 0.0413,
|
|
"step": 2450
|
|
},
|
|
{
|
|
"epoch": 40.83,
|
|
"learning_rate": 1.895833333333333e-05,
|
|
"loss": 0.0369,
|
|
"step": 2460
|
|
},
|
|
{
|
|
"epoch": 41.0,
|
|
"learning_rate": 1.8877314814814814e-05,
|
|
"loss": 0.0244,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 41.0,
|
|
"eval_accuracy": 0.7608695652173914,
|
|
"eval_loss": 1.2912131547927856,
|
|
"eval_runtime": 0.6786,
|
|
"eval_samples_per_second": 67.783,
|
|
"eval_steps_per_second": 4.421,
|
|
"step": 2470
|
|
},
|
|
{
|
|
"epoch": 41.16,
|
|
"learning_rate": 1.8796296296296296e-05,
|
|
"loss": 0.0467,
|
|
"step": 2480
|
|
},
|
|
{
|
|
"epoch": 41.33,
|
|
"learning_rate": 1.8715277777777776e-05,
|
|
"loss": 0.0295,
|
|
"step": 2490
|
|
},
|
|
{
|
|
"epoch": 41.49,
|
|
"learning_rate": 1.863425925925926e-05,
|
|
"loss": 0.0241,
|
|
"step": 2500
|
|
},
|
|
{
|
|
"epoch": 41.66,
|
|
"learning_rate": 1.8553240740740738e-05,
|
|
"loss": 0.0238,
|
|
"step": 2510
|
|
},
|
|
{
|
|
"epoch": 41.83,
|
|
"learning_rate": 1.847222222222222e-05,
|
|
"loss": 0.0201,
|
|
"step": 2520
|
|
},
|
|
{
|
|
"epoch": 41.99,
|
|
"learning_rate": 1.83912037037037e-05,
|
|
"loss": 0.0122,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 41.99,
|
|
"eval_accuracy": 0.7608695652173914,
|
|
"eval_loss": 1.5698707103729248,
|
|
"eval_runtime": 0.7096,
|
|
"eval_samples_per_second": 64.826,
|
|
"eval_steps_per_second": 4.228,
|
|
"step": 2530
|
|
},
|
|
{
|
|
"epoch": 42.16,
|
|
"learning_rate": 1.8310185185185182e-05,
|
|
"loss": 0.0321,
|
|
"step": 2540
|
|
},
|
|
{
|
|
"epoch": 42.32,
|
|
"learning_rate": 1.8229166666666665e-05,
|
|
"loss": 0.0336,
|
|
"step": 2550
|
|
},
|
|
{
|
|
"epoch": 42.49,
|
|
"learning_rate": 1.8148148148148144e-05,
|
|
"loss": 0.0143,
|
|
"step": 2560
|
|
},
|
|
{
|
|
"epoch": 42.66,
|
|
"learning_rate": 1.806712962962963e-05,
|
|
"loss": 0.0205,
|
|
"step": 2570
|
|
},
|
|
{
|
|
"epoch": 42.82,
|
|
"learning_rate": 1.798611111111111e-05,
|
|
"loss": 0.0347,
|
|
"step": 2580
|
|
},
|
|
{
|
|
"epoch": 42.99,
|
|
"learning_rate": 1.7905092592592592e-05,
|
|
"loss": 0.023,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 42.99,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.51142156124115,
|
|
"eval_runtime": 0.7048,
|
|
"eval_samples_per_second": 65.263,
|
|
"eval_steps_per_second": 4.256,
|
|
"step": 2590
|
|
},
|
|
{
|
|
"epoch": 43.15,
|
|
"learning_rate": 1.7824074074074075e-05,
|
|
"loss": 0.025,
|
|
"step": 2600
|
|
},
|
|
{
|
|
"epoch": 43.32,
|
|
"learning_rate": 1.7743055555555554e-05,
|
|
"loss": 0.0213,
|
|
"step": 2610
|
|
},
|
|
{
|
|
"epoch": 43.49,
|
|
"learning_rate": 1.7662037037037037e-05,
|
|
"loss": 0.0491,
|
|
"step": 2620
|
|
},
|
|
{
|
|
"epoch": 43.65,
|
|
"learning_rate": 1.7581018518518516e-05,
|
|
"loss": 0.0148,
|
|
"step": 2630
|
|
},
|
|
{
|
|
"epoch": 43.82,
|
|
"learning_rate": 1.75e-05,
|
|
"loss": 0.0267,
|
|
"step": 2640
|
|
},
|
|
{
|
|
"epoch": 43.98,
|
|
"learning_rate": 1.741898148148148e-05,
|
|
"loss": 0.0297,
|
|
"step": 2650
|
|
},
|
|
{
|
|
"epoch": 44.0,
|
|
"eval_accuracy": 0.8478260869565217,
|
|
"eval_loss": 1.2188609838485718,
|
|
"eval_runtime": 0.7237,
|
|
"eval_samples_per_second": 63.561,
|
|
"eval_steps_per_second": 4.145,
|
|
"step": 2651
|
|
},
|
|
{
|
|
"epoch": 44.15,
|
|
"learning_rate": 1.733796296296296e-05,
|
|
"loss": 0.0183,
|
|
"step": 2660
|
|
},
|
|
{
|
|
"epoch": 44.32,
|
|
"learning_rate": 1.7256944444444443e-05,
|
|
"loss": 0.0352,
|
|
"step": 2670
|
|
},
|
|
{
|
|
"epoch": 44.48,
|
|
"learning_rate": 1.7175925925925926e-05,
|
|
"loss": 0.0243,
|
|
"step": 2680
|
|
},
|
|
{
|
|
"epoch": 44.65,
|
|
"learning_rate": 1.7094907407407405e-05,
|
|
"loss": 0.104,
|
|
"step": 2690
|
|
},
|
|
{
|
|
"epoch": 44.81,
|
|
"learning_rate": 1.7013888888888888e-05,
|
|
"loss": 0.015,
|
|
"step": 2700
|
|
},
|
|
{
|
|
"epoch": 44.98,
|
|
"learning_rate": 1.6932870370370367e-05,
|
|
"loss": 0.0284,
|
|
"step": 2710
|
|
},
|
|
{
|
|
"epoch": 45.0,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.3997039794921875,
|
|
"eval_runtime": 0.7121,
|
|
"eval_samples_per_second": 64.595,
|
|
"eval_steps_per_second": 4.213,
|
|
"step": 2711
|
|
},
|
|
{
|
|
"epoch": 45.15,
|
|
"learning_rate": 1.685185185185185e-05,
|
|
"loss": 0.0133,
|
|
"step": 2720
|
|
},
|
|
{
|
|
"epoch": 45.31,
|
|
"learning_rate": 1.6770833333333332e-05,
|
|
"loss": 0.0159,
|
|
"step": 2730
|
|
},
|
|
{
|
|
"epoch": 45.48,
|
|
"learning_rate": 1.6689814814814815e-05,
|
|
"loss": 0.0183,
|
|
"step": 2740
|
|
},
|
|
{
|
|
"epoch": 45.64,
|
|
"learning_rate": 1.6608796296296294e-05,
|
|
"loss": 0.0302,
|
|
"step": 2750
|
|
},
|
|
{
|
|
"epoch": 45.81,
|
|
"learning_rate": 1.6527777777777777e-05,
|
|
"loss": 0.0232,
|
|
"step": 2760
|
|
},
|
|
{
|
|
"epoch": 45.98,
|
|
"learning_rate": 1.6446759259259256e-05,
|
|
"loss": 0.0203,
|
|
"step": 2770
|
|
},
|
|
{
|
|
"epoch": 45.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.4792205095291138,
|
|
"eval_runtime": 0.6983,
|
|
"eval_samples_per_second": 65.879,
|
|
"eval_steps_per_second": 4.296,
|
|
"step": 2771
|
|
},
|
|
{
|
|
"epoch": 46.14,
|
|
"learning_rate": 1.636574074074074e-05,
|
|
"loss": 0.037,
|
|
"step": 2780
|
|
},
|
|
{
|
|
"epoch": 46.31,
|
|
"learning_rate": 1.628472222222222e-05,
|
|
"loss": 0.0337,
|
|
"step": 2790
|
|
},
|
|
{
|
|
"epoch": 46.47,
|
|
"learning_rate": 1.6203703703703704e-05,
|
|
"loss": 0.012,
|
|
"step": 2800
|
|
},
|
|
{
|
|
"epoch": 46.64,
|
|
"learning_rate": 1.6122685185185183e-05,
|
|
"loss": 0.0568,
|
|
"step": 2810
|
|
},
|
|
{
|
|
"epoch": 46.8,
|
|
"learning_rate": 1.6041666666666666e-05,
|
|
"loss": 0.018,
|
|
"step": 2820
|
|
},
|
|
{
|
|
"epoch": 46.97,
|
|
"learning_rate": 1.596064814814815e-05,
|
|
"loss": 0.03,
|
|
"step": 2830
|
|
},
|
|
{
|
|
"epoch": 46.99,
|
|
"eval_accuracy": 0.717391304347826,
|
|
"eval_loss": 1.7487258911132812,
|
|
"eval_runtime": 0.7112,
|
|
"eval_samples_per_second": 64.68,
|
|
"eval_steps_per_second": 4.218,
|
|
"step": 2831
|
|
},
|
|
{
|
|
"epoch": 47.14,
|
|
"learning_rate": 1.5879629629629628e-05,
|
|
"loss": 0.0083,
|
|
"step": 2840
|
|
},
|
|
{
|
|
"epoch": 47.3,
|
|
"learning_rate": 1.579861111111111e-05,
|
|
"loss": 0.0431,
|
|
"step": 2850
|
|
},
|
|
{
|
|
"epoch": 47.47,
|
|
"learning_rate": 1.571759259259259e-05,
|
|
"loss": 0.0104,
|
|
"step": 2860
|
|
},
|
|
{
|
|
"epoch": 47.63,
|
|
"learning_rate": 1.5636574074074072e-05,
|
|
"loss": 0.0322,
|
|
"step": 2870
|
|
},
|
|
{
|
|
"epoch": 47.8,
|
|
"learning_rate": 1.5555555555555555e-05,
|
|
"loss": 0.0185,
|
|
"step": 2880
|
|
},
|
|
{
|
|
"epoch": 47.97,
|
|
"learning_rate": 1.5474537037037038e-05,
|
|
"loss": 0.025,
|
|
"step": 2890
|
|
},
|
|
{
|
|
"epoch": 48.0,
|
|
"eval_accuracy": 0.7608695652173914,
|
|
"eval_loss": 1.6605217456817627,
|
|
"eval_runtime": 0.6827,
|
|
"eval_samples_per_second": 67.383,
|
|
"eval_steps_per_second": 4.395,
|
|
"step": 2892
|
|
},
|
|
{
|
|
"epoch": 48.13,
|
|
"learning_rate": 1.5393518518518517e-05,
|
|
"loss": 0.0269,
|
|
"step": 2900
|
|
},
|
|
{
|
|
"epoch": 48.3,
|
|
"learning_rate": 1.53125e-05,
|
|
"loss": 0.0274,
|
|
"step": 2910
|
|
},
|
|
{
|
|
"epoch": 48.46,
|
|
"learning_rate": 1.523148148148148e-05,
|
|
"loss": 0.033,
|
|
"step": 2920
|
|
},
|
|
{
|
|
"epoch": 48.63,
|
|
"learning_rate": 1.5150462962962961e-05,
|
|
"loss": 0.0241,
|
|
"step": 2930
|
|
},
|
|
{
|
|
"epoch": 48.8,
|
|
"learning_rate": 1.5069444444444444e-05,
|
|
"loss": 0.0445,
|
|
"step": 2940
|
|
},
|
|
{
|
|
"epoch": 48.96,
|
|
"learning_rate": 1.4988425925925925e-05,
|
|
"loss": 0.0134,
|
|
"step": 2950
|
|
},
|
|
{
|
|
"epoch": 49.0,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.410630226135254,
|
|
"eval_runtime": 0.7027,
|
|
"eval_samples_per_second": 65.459,
|
|
"eval_steps_per_second": 4.269,
|
|
"step": 2952
|
|
},
|
|
{
|
|
"epoch": 49.13,
|
|
"learning_rate": 1.4907407407407406e-05,
|
|
"loss": 0.0366,
|
|
"step": 2960
|
|
},
|
|
{
|
|
"epoch": 49.29,
|
|
"learning_rate": 1.4826388888888887e-05,
|
|
"loss": 0.0219,
|
|
"step": 2970
|
|
},
|
|
{
|
|
"epoch": 49.46,
|
|
"learning_rate": 1.4745370370370368e-05,
|
|
"loss": 0.0092,
|
|
"step": 2980
|
|
},
|
|
{
|
|
"epoch": 49.63,
|
|
"learning_rate": 1.4664351851851849e-05,
|
|
"loss": 0.0295,
|
|
"step": 2990
|
|
},
|
|
{
|
|
"epoch": 49.79,
|
|
"learning_rate": 1.4583333333333333e-05,
|
|
"loss": 0.0183,
|
|
"step": 3000
|
|
},
|
|
{
|
|
"epoch": 49.96,
|
|
"learning_rate": 1.4502314814814814e-05,
|
|
"loss": 0.026,
|
|
"step": 3010
|
|
},
|
|
{
|
|
"epoch": 49.99,
|
|
"eval_accuracy": 0.7608695652173914,
|
|
"eval_loss": 1.2972004413604736,
|
|
"eval_runtime": 0.6842,
|
|
"eval_samples_per_second": 67.236,
|
|
"eval_steps_per_second": 4.385,
|
|
"step": 3012
|
|
},
|
|
{
|
|
"epoch": 50.12,
|
|
"learning_rate": 1.4421296296296295e-05,
|
|
"loss": 0.0207,
|
|
"step": 3020
|
|
},
|
|
{
|
|
"epoch": 50.29,
|
|
"learning_rate": 1.4340277777777776e-05,
|
|
"loss": 0.0111,
|
|
"step": 3030
|
|
},
|
|
{
|
|
"epoch": 50.46,
|
|
"learning_rate": 1.4259259259259257e-05,
|
|
"loss": 0.0364,
|
|
"step": 3040
|
|
},
|
|
{
|
|
"epoch": 50.62,
|
|
"learning_rate": 1.417824074074074e-05,
|
|
"loss": 0.0148,
|
|
"step": 3050
|
|
},
|
|
{
|
|
"epoch": 50.79,
|
|
"learning_rate": 1.409722222222222e-05,
|
|
"loss": 0.0271,
|
|
"step": 3060
|
|
},
|
|
{
|
|
"epoch": 50.95,
|
|
"learning_rate": 1.4016203703703703e-05,
|
|
"loss": 0.0507,
|
|
"step": 3070
|
|
},
|
|
{
|
|
"epoch": 50.99,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.3302582502365112,
|
|
"eval_runtime": 0.694,
|
|
"eval_samples_per_second": 66.283,
|
|
"eval_steps_per_second": 4.323,
|
|
"step": 3072
|
|
},
|
|
{
|
|
"epoch": 51.12,
|
|
"learning_rate": 1.3935185185185184e-05,
|
|
"loss": 0.0164,
|
|
"step": 3080
|
|
},
|
|
{
|
|
"epoch": 51.29,
|
|
"learning_rate": 1.3854166666666665e-05,
|
|
"loss": 0.0151,
|
|
"step": 3090
|
|
},
|
|
{
|
|
"epoch": 51.45,
|
|
"learning_rate": 1.3773148148148148e-05,
|
|
"loss": 0.0495,
|
|
"step": 3100
|
|
},
|
|
{
|
|
"epoch": 51.62,
|
|
"learning_rate": 1.3692129629629629e-05,
|
|
"loss": 0.0056,
|
|
"step": 3110
|
|
},
|
|
{
|
|
"epoch": 51.78,
|
|
"learning_rate": 1.361111111111111e-05,
|
|
"loss": 0.0323,
|
|
"step": 3120
|
|
},
|
|
{
|
|
"epoch": 51.95,
|
|
"learning_rate": 1.353009259259259e-05,
|
|
"loss": 0.0394,
|
|
"step": 3130
|
|
},
|
|
{
|
|
"epoch": 52.0,
|
|
"eval_accuracy": 0.8478260869565217,
|
|
"eval_loss": 1.1954048871994019,
|
|
"eval_runtime": 0.7081,
|
|
"eval_samples_per_second": 64.959,
|
|
"eval_steps_per_second": 4.236,
|
|
"step": 3133
|
|
},
|
|
{
|
|
"epoch": 52.12,
|
|
"learning_rate": 1.3449074074074072e-05,
|
|
"loss": 0.0266,
|
|
"step": 3140
|
|
},
|
|
{
|
|
"epoch": 52.28,
|
|
"learning_rate": 1.3368055555555554e-05,
|
|
"loss": 0.0354,
|
|
"step": 3150
|
|
},
|
|
{
|
|
"epoch": 52.45,
|
|
"learning_rate": 1.3287037037037037e-05,
|
|
"loss": 0.0168,
|
|
"step": 3160
|
|
},
|
|
{
|
|
"epoch": 52.61,
|
|
"learning_rate": 1.3206018518518518e-05,
|
|
"loss": 0.0259,
|
|
"step": 3170
|
|
},
|
|
{
|
|
"epoch": 52.78,
|
|
"learning_rate": 1.3124999999999999e-05,
|
|
"loss": 0.027,
|
|
"step": 3180
|
|
},
|
|
{
|
|
"epoch": 52.95,
|
|
"learning_rate": 1.304398148148148e-05,
|
|
"loss": 0.0271,
|
|
"step": 3190
|
|
},
|
|
{
|
|
"epoch": 53.0,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 1.312525987625122,
|
|
"eval_runtime": 0.6832,
|
|
"eval_samples_per_second": 67.329,
|
|
"eval_steps_per_second": 4.391,
|
|
"step": 3193
|
|
},
|
|
{
|
|
"epoch": 53.11,
|
|
"learning_rate": 1.296296296296296e-05,
|
|
"loss": 0.0101,
|
|
"step": 3200
|
|
},
|
|
{
|
|
"epoch": 53.28,
|
|
"learning_rate": 1.2881944444444443e-05,
|
|
"loss": 0.0212,
|
|
"step": 3210
|
|
},
|
|
{
|
|
"epoch": 53.44,
|
|
"learning_rate": 1.2800925925925926e-05,
|
|
"loss": 0.0057,
|
|
"step": 3220
|
|
},
|
|
{
|
|
"epoch": 53.61,
|
|
"learning_rate": 1.2719907407407407e-05,
|
|
"loss": 0.0132,
|
|
"step": 3230
|
|
},
|
|
{
|
|
"epoch": 53.78,
|
|
"learning_rate": 1.2638888888888888e-05,
|
|
"loss": 0.0173,
|
|
"step": 3240
|
|
},
|
|
{
|
|
"epoch": 53.94,
|
|
"learning_rate": 1.2557870370370369e-05,
|
|
"loss": 0.0115,
|
|
"step": 3250
|
|
},
|
|
{
|
|
"epoch": 53.99,
|
|
"eval_accuracy": 0.8478260869565217,
|
|
"eval_loss": 1.3443913459777832,
|
|
"eval_runtime": 0.7167,
|
|
"eval_samples_per_second": 64.185,
|
|
"eval_steps_per_second": 4.186,
|
|
"step": 3253
|
|
},
|
|
{
|
|
"epoch": 54.11,
|
|
"learning_rate": 1.247685185185185e-05,
|
|
"loss": 0.0028,
|
|
"step": 3260
|
|
},
|
|
{
|
|
"epoch": 54.27,
|
|
"learning_rate": 1.2395833333333333e-05,
|
|
"loss": 0.0184,
|
|
"step": 3270
|
|
},
|
|
{
|
|
"epoch": 54.44,
|
|
"learning_rate": 1.2314814814814813e-05,
|
|
"loss": 0.0007,
|
|
"step": 3280
|
|
},
|
|
{
|
|
"epoch": 54.61,
|
|
"learning_rate": 1.2233796296296294e-05,
|
|
"loss": 0.0233,
|
|
"step": 3290
|
|
},
|
|
{
|
|
"epoch": 54.77,
|
|
"learning_rate": 1.2152777777777777e-05,
|
|
"loss": 0.0078,
|
|
"step": 3300
|
|
},
|
|
{
|
|
"epoch": 54.94,
|
|
"learning_rate": 1.2071759259259258e-05,
|
|
"loss": 0.0138,
|
|
"step": 3310
|
|
},
|
|
{
|
|
"epoch": 54.99,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 1.4688717126846313,
|
|
"eval_runtime": 0.6497,
|
|
"eval_samples_per_second": 70.806,
|
|
"eval_steps_per_second": 4.618,
|
|
"step": 3313
|
|
},
|
|
{
|
|
"epoch": 55.1,
|
|
"learning_rate": 1.199074074074074e-05,
|
|
"loss": 0.0081,
|
|
"step": 3320
|
|
},
|
|
{
|
|
"epoch": 55.27,
|
|
"learning_rate": 1.1909722222222222e-05,
|
|
"loss": 0.0218,
|
|
"step": 3330
|
|
},
|
|
{
|
|
"epoch": 55.44,
|
|
"learning_rate": 1.1828703703703703e-05,
|
|
"loss": 0.0143,
|
|
"step": 3340
|
|
},
|
|
{
|
|
"epoch": 55.6,
|
|
"learning_rate": 1.1747685185185184e-05,
|
|
"loss": 0.0216,
|
|
"step": 3350
|
|
},
|
|
{
|
|
"epoch": 55.77,
|
|
"learning_rate": 1.1666666666666665e-05,
|
|
"loss": 0.0091,
|
|
"step": 3360
|
|
},
|
|
{
|
|
"epoch": 55.93,
|
|
"learning_rate": 1.1585648148148149e-05,
|
|
"loss": 0.0184,
|
|
"step": 3370
|
|
},
|
|
{
|
|
"epoch": 56.0,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 1.4959295988082886,
|
|
"eval_runtime": 0.6842,
|
|
"eval_samples_per_second": 67.228,
|
|
"eval_steps_per_second": 4.384,
|
|
"step": 3374
|
|
},
|
|
{
|
|
"epoch": 56.1,
|
|
"learning_rate": 1.150462962962963e-05,
|
|
"loss": 0.0134,
|
|
"step": 3380
|
|
},
|
|
{
|
|
"epoch": 56.27,
|
|
"learning_rate": 1.142361111111111e-05,
|
|
"loss": 0.0193,
|
|
"step": 3390
|
|
},
|
|
{
|
|
"epoch": 56.43,
|
|
"learning_rate": 1.1342592592592592e-05,
|
|
"loss": 0.0049,
|
|
"step": 3400
|
|
},
|
|
{
|
|
"epoch": 56.6,
|
|
"learning_rate": 1.1261574074074073e-05,
|
|
"loss": 0.0105,
|
|
"step": 3410
|
|
},
|
|
{
|
|
"epoch": 56.76,
|
|
"learning_rate": 1.1180555555555554e-05,
|
|
"loss": 0.0245,
|
|
"step": 3420
|
|
},
|
|
{
|
|
"epoch": 56.93,
|
|
"learning_rate": 1.1099537037037036e-05,
|
|
"loss": 0.0163,
|
|
"step": 3430
|
|
},
|
|
{
|
|
"epoch": 57.0,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.349004864692688,
|
|
"eval_runtime": 0.6951,
|
|
"eval_samples_per_second": 66.174,
|
|
"eval_steps_per_second": 4.316,
|
|
"step": 3434
|
|
},
|
|
{
|
|
"epoch": 57.1,
|
|
"learning_rate": 1.1018518518518517e-05,
|
|
"loss": 0.0116,
|
|
"step": 3440
|
|
},
|
|
{
|
|
"epoch": 57.26,
|
|
"learning_rate": 1.0937499999999998e-05,
|
|
"loss": 0.0352,
|
|
"step": 3450
|
|
},
|
|
{
|
|
"epoch": 57.43,
|
|
"learning_rate": 1.085648148148148e-05,
|
|
"loss": 0.0316,
|
|
"step": 3460
|
|
},
|
|
{
|
|
"epoch": 57.59,
|
|
"learning_rate": 1.0775462962962962e-05,
|
|
"loss": 0.0291,
|
|
"step": 3470
|
|
},
|
|
{
|
|
"epoch": 57.76,
|
|
"learning_rate": 1.0694444444444444e-05,
|
|
"loss": 0.0388,
|
|
"step": 3480
|
|
},
|
|
{
|
|
"epoch": 57.93,
|
|
"learning_rate": 1.0613425925925925e-05,
|
|
"loss": 0.0112,
|
|
"step": 3490
|
|
},
|
|
{
|
|
"epoch": 57.99,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.4749095439910889,
|
|
"eval_runtime": 0.6781,
|
|
"eval_samples_per_second": 67.833,
|
|
"eval_steps_per_second": 4.424,
|
|
"step": 3494
|
|
},
|
|
{
|
|
"epoch": 58.09,
|
|
"learning_rate": 1.0532407407407406e-05,
|
|
"loss": 0.025,
|
|
"step": 3500
|
|
},
|
|
{
|
|
"epoch": 58.26,
|
|
"learning_rate": 1.0451388888888887e-05,
|
|
"loss": 0.0354,
|
|
"step": 3510
|
|
},
|
|
{
|
|
"epoch": 58.42,
|
|
"learning_rate": 1.0370370370370368e-05,
|
|
"loss": 0.0064,
|
|
"step": 3520
|
|
},
|
|
{
|
|
"epoch": 58.59,
|
|
"learning_rate": 1.028935185185185e-05,
|
|
"loss": 0.0058,
|
|
"step": 3530
|
|
},
|
|
{
|
|
"epoch": 58.76,
|
|
"learning_rate": 1.0208333333333334e-05,
|
|
"loss": 0.0241,
|
|
"step": 3540
|
|
},
|
|
{
|
|
"epoch": 58.92,
|
|
"learning_rate": 1.0127314814814815e-05,
|
|
"loss": 0.0185,
|
|
"step": 3550
|
|
},
|
|
{
|
|
"epoch": 58.99,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.5823291540145874,
|
|
"eval_runtime": 0.6695,
|
|
"eval_samples_per_second": 68.712,
|
|
"eval_steps_per_second": 4.481,
|
|
"step": 3554
|
|
},
|
|
{
|
|
"epoch": 59.09,
|
|
"learning_rate": 1.0046296296296295e-05,
|
|
"loss": 0.0089,
|
|
"step": 3560
|
|
},
|
|
{
|
|
"epoch": 59.25,
|
|
"learning_rate": 9.965277777777776e-06,
|
|
"loss": 0.0291,
|
|
"step": 3570
|
|
},
|
|
{
|
|
"epoch": 59.42,
|
|
"learning_rate": 9.884259259259257e-06,
|
|
"loss": 0.0118,
|
|
"step": 3580
|
|
},
|
|
{
|
|
"epoch": 59.59,
|
|
"learning_rate": 9.80324074074074e-06,
|
|
"loss": 0.0318,
|
|
"step": 3590
|
|
},
|
|
{
|
|
"epoch": 59.75,
|
|
"learning_rate": 9.722222222222221e-06,
|
|
"loss": 0.0395,
|
|
"step": 3600
|
|
},
|
|
{
|
|
"epoch": 59.92,
|
|
"learning_rate": 9.641203703703704e-06,
|
|
"loss": 0.031,
|
|
"step": 3610
|
|
},
|
|
{
|
|
"epoch": 60.0,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.5190129280090332,
|
|
"eval_runtime": 0.7102,
|
|
"eval_samples_per_second": 64.766,
|
|
"eval_steps_per_second": 4.224,
|
|
"step": 3615
|
|
},
|
|
{
|
|
"epoch": 60.08,
|
|
"learning_rate": 9.560185185185185e-06,
|
|
"loss": 0.0127,
|
|
"step": 3620
|
|
},
|
|
{
|
|
"epoch": 60.25,
|
|
"learning_rate": 9.479166666666666e-06,
|
|
"loss": 0.0065,
|
|
"step": 3630
|
|
},
|
|
{
|
|
"epoch": 60.41,
|
|
"learning_rate": 9.398148148148148e-06,
|
|
"loss": 0.0129,
|
|
"step": 3640
|
|
},
|
|
{
|
|
"epoch": 60.58,
|
|
"learning_rate": 9.31712962962963e-06,
|
|
"loss": 0.0157,
|
|
"step": 3650
|
|
},
|
|
{
|
|
"epoch": 60.75,
|
|
"learning_rate": 9.23611111111111e-06,
|
|
"loss": 0.0134,
|
|
"step": 3660
|
|
},
|
|
{
|
|
"epoch": 60.91,
|
|
"learning_rate": 9.155092592592591e-06,
|
|
"loss": 0.0161,
|
|
"step": 3670
|
|
},
|
|
{
|
|
"epoch": 61.0,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.5475845336914062,
|
|
"eval_runtime": 0.7113,
|
|
"eval_samples_per_second": 64.674,
|
|
"eval_steps_per_second": 4.218,
|
|
"step": 3675
|
|
},
|
|
{
|
|
"epoch": 61.08,
|
|
"learning_rate": 9.074074074074072e-06,
|
|
"loss": 0.0117,
|
|
"step": 3680
|
|
},
|
|
{
|
|
"epoch": 61.24,
|
|
"learning_rate": 8.993055555555555e-06,
|
|
"loss": 0.0135,
|
|
"step": 3690
|
|
},
|
|
{
|
|
"epoch": 61.41,
|
|
"learning_rate": 8.912037037037037e-06,
|
|
"loss": 0.0179,
|
|
"step": 3700
|
|
},
|
|
{
|
|
"epoch": 61.58,
|
|
"learning_rate": 8.831018518518518e-06,
|
|
"loss": 0.0253,
|
|
"step": 3710
|
|
},
|
|
{
|
|
"epoch": 61.74,
|
|
"learning_rate": 8.75e-06,
|
|
"loss": 0.0267,
|
|
"step": 3720
|
|
},
|
|
{
|
|
"epoch": 61.91,
|
|
"learning_rate": 8.66898148148148e-06,
|
|
"loss": 0.0146,
|
|
"step": 3730
|
|
},
|
|
{
|
|
"epoch": 61.99,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.3930317163467407,
|
|
"eval_runtime": 0.6749,
|
|
"eval_samples_per_second": 68.162,
|
|
"eval_steps_per_second": 4.445,
|
|
"step": 3735
|
|
},
|
|
{
|
|
"epoch": 62.07,
|
|
"learning_rate": 8.587962962962963e-06,
|
|
"loss": 0.0202,
|
|
"step": 3740
|
|
},
|
|
{
|
|
"epoch": 62.24,
|
|
"learning_rate": 8.506944444444444e-06,
|
|
"loss": 0.0162,
|
|
"step": 3750
|
|
},
|
|
{
|
|
"epoch": 62.41,
|
|
"learning_rate": 8.425925925925925e-06,
|
|
"loss": 0.0199,
|
|
"step": 3760
|
|
},
|
|
{
|
|
"epoch": 62.57,
|
|
"learning_rate": 8.344907407407407e-06,
|
|
"loss": 0.0135,
|
|
"step": 3770
|
|
},
|
|
{
|
|
"epoch": 62.74,
|
|
"learning_rate": 8.263888888888888e-06,
|
|
"loss": 0.0132,
|
|
"step": 3780
|
|
},
|
|
{
|
|
"epoch": 62.9,
|
|
"learning_rate": 8.18287037037037e-06,
|
|
"loss": 0.005,
|
|
"step": 3790
|
|
},
|
|
{
|
|
"epoch": 62.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.5454456806182861,
|
|
"eval_runtime": 0.7582,
|
|
"eval_samples_per_second": 60.67,
|
|
"eval_steps_per_second": 3.957,
|
|
"step": 3795
|
|
},
|
|
{
|
|
"epoch": 63.07,
|
|
"learning_rate": 8.101851851851852e-06,
|
|
"loss": 0.03,
|
|
"step": 3800
|
|
},
|
|
{
|
|
"epoch": 63.24,
|
|
"learning_rate": 8.020833333333333e-06,
|
|
"loss": 0.0326,
|
|
"step": 3810
|
|
},
|
|
{
|
|
"epoch": 63.4,
|
|
"learning_rate": 7.939814814814814e-06,
|
|
"loss": 0.0128,
|
|
"step": 3820
|
|
},
|
|
{
|
|
"epoch": 63.57,
|
|
"learning_rate": 7.858796296296295e-06,
|
|
"loss": 0.0108,
|
|
"step": 3830
|
|
},
|
|
{
|
|
"epoch": 63.73,
|
|
"learning_rate": 7.777777777777777e-06,
|
|
"loss": 0.0011,
|
|
"step": 3840
|
|
},
|
|
{
|
|
"epoch": 63.9,
|
|
"learning_rate": 7.696759259259258e-06,
|
|
"loss": 0.0093,
|
|
"step": 3850
|
|
},
|
|
{
|
|
"epoch": 64.0,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.5958888530731201,
|
|
"eval_runtime": 0.6768,
|
|
"eval_samples_per_second": 67.965,
|
|
"eval_steps_per_second": 4.433,
|
|
"step": 3856
|
|
},
|
|
{
|
|
"epoch": 64.07,
|
|
"learning_rate": 7.61574074074074e-06,
|
|
"loss": 0.0132,
|
|
"step": 3860
|
|
},
|
|
{
|
|
"epoch": 64.23,
|
|
"learning_rate": 7.534722222222222e-06,
|
|
"loss": 0.0085,
|
|
"step": 3870
|
|
},
|
|
{
|
|
"epoch": 64.4,
|
|
"learning_rate": 7.453703703703703e-06,
|
|
"loss": 0.0326,
|
|
"step": 3880
|
|
},
|
|
{
|
|
"epoch": 64.56,
|
|
"learning_rate": 7.372685185185184e-06,
|
|
"loss": 0.0295,
|
|
"step": 3890
|
|
},
|
|
{
|
|
"epoch": 64.73,
|
|
"learning_rate": 7.291666666666667e-06,
|
|
"loss": 0.0172,
|
|
"step": 3900
|
|
},
|
|
{
|
|
"epoch": 64.9,
|
|
"learning_rate": 7.2106481481481475e-06,
|
|
"loss": 0.0224,
|
|
"step": 3910
|
|
},
|
|
{
|
|
"epoch": 65.0,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.4553816318511963,
|
|
"eval_runtime": 0.7364,
|
|
"eval_samples_per_second": 62.467,
|
|
"eval_steps_per_second": 4.074,
|
|
"step": 3916
|
|
},
|
|
{
|
|
"epoch": 65.06,
|
|
"learning_rate": 7.1296296296296285e-06,
|
|
"loss": 0.0208,
|
|
"step": 3920
|
|
},
|
|
{
|
|
"epoch": 65.23,
|
|
"learning_rate": 7.04861111111111e-06,
|
|
"loss": 0.023,
|
|
"step": 3930
|
|
},
|
|
{
|
|
"epoch": 65.39,
|
|
"learning_rate": 6.967592592592592e-06,
|
|
"loss": 0.0205,
|
|
"step": 3940
|
|
},
|
|
{
|
|
"epoch": 65.56,
|
|
"learning_rate": 6.886574074074074e-06,
|
|
"loss": 0.0088,
|
|
"step": 3950
|
|
},
|
|
{
|
|
"epoch": 65.73,
|
|
"learning_rate": 6.805555555555555e-06,
|
|
"loss": 0.0223,
|
|
"step": 3960
|
|
},
|
|
{
|
|
"epoch": 65.89,
|
|
"learning_rate": 6.724537037037036e-06,
|
|
"loss": 0.0154,
|
|
"step": 3970
|
|
},
|
|
{
|
|
"epoch": 65.99,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 1.5326697826385498,
|
|
"eval_runtime": 0.7162,
|
|
"eval_samples_per_second": 64.228,
|
|
"eval_steps_per_second": 4.189,
|
|
"step": 3976
|
|
},
|
|
{
|
|
"epoch": 66.06,
|
|
"learning_rate": 6.6435185185185185e-06,
|
|
"loss": 0.0093,
|
|
"step": 3980
|
|
},
|
|
{
|
|
"epoch": 66.22,
|
|
"learning_rate": 6.5624999999999994e-06,
|
|
"loss": 0.0138,
|
|
"step": 3990
|
|
},
|
|
{
|
|
"epoch": 66.39,
|
|
"learning_rate": 6.48148148148148e-06,
|
|
"loss": 0.0059,
|
|
"step": 4000
|
|
},
|
|
{
|
|
"epoch": 66.56,
|
|
"learning_rate": 6.400462962962963e-06,
|
|
"loss": 0.0033,
|
|
"step": 4010
|
|
},
|
|
{
|
|
"epoch": 66.72,
|
|
"learning_rate": 6.319444444444444e-06,
|
|
"loss": 0.0237,
|
|
"step": 4020
|
|
},
|
|
{
|
|
"epoch": 66.89,
|
|
"learning_rate": 6.238425925925925e-06,
|
|
"loss": 0.0116,
|
|
"step": 4030
|
|
},
|
|
{
|
|
"epoch": 66.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.6029945611953735,
|
|
"eval_runtime": 0.7141,
|
|
"eval_samples_per_second": 64.418,
|
|
"eval_steps_per_second": 4.201,
|
|
"step": 4036
|
|
},
|
|
{
|
|
"epoch": 67.05,
|
|
"learning_rate": 6.157407407407407e-06,
|
|
"loss": 0.0064,
|
|
"step": 4040
|
|
},
|
|
{
|
|
"epoch": 67.22,
|
|
"learning_rate": 6.0763888888888885e-06,
|
|
"loss": 0.0129,
|
|
"step": 4050
|
|
},
|
|
{
|
|
"epoch": 67.39,
|
|
"learning_rate": 5.99537037037037e-06,
|
|
"loss": 0.0032,
|
|
"step": 4060
|
|
},
|
|
{
|
|
"epoch": 67.55,
|
|
"learning_rate": 5.914351851851851e-06,
|
|
"loss": 0.0101,
|
|
"step": 4070
|
|
},
|
|
{
|
|
"epoch": 67.72,
|
|
"learning_rate": 5.833333333333332e-06,
|
|
"loss": 0.0051,
|
|
"step": 4080
|
|
},
|
|
{
|
|
"epoch": 67.88,
|
|
"learning_rate": 5.752314814814815e-06,
|
|
"loss": 0.0037,
|
|
"step": 4090
|
|
},
|
|
{
|
|
"epoch": 68.0,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 1.5046288967132568,
|
|
"eval_runtime": 0.7027,
|
|
"eval_samples_per_second": 65.464,
|
|
"eval_steps_per_second": 4.269,
|
|
"step": 4097
|
|
},
|
|
{
|
|
"epoch": 68.05,
|
|
"learning_rate": 5.671296296296296e-06,
|
|
"loss": 0.0151,
|
|
"step": 4100
|
|
},
|
|
{
|
|
"epoch": 68.22,
|
|
"learning_rate": 5.590277777777777e-06,
|
|
"loss": 0.0007,
|
|
"step": 4110
|
|
},
|
|
{
|
|
"epoch": 68.38,
|
|
"learning_rate": 5.509259259259259e-06,
|
|
"loss": 0.0212,
|
|
"step": 4120
|
|
},
|
|
{
|
|
"epoch": 68.55,
|
|
"learning_rate": 5.42824074074074e-06,
|
|
"loss": 0.0038,
|
|
"step": 4130
|
|
},
|
|
{
|
|
"epoch": 68.71,
|
|
"learning_rate": 5.347222222222222e-06,
|
|
"loss": 0.0211,
|
|
"step": 4140
|
|
},
|
|
{
|
|
"epoch": 68.88,
|
|
"learning_rate": 5.266203703703703e-06,
|
|
"loss": 0.0023,
|
|
"step": 4150
|
|
},
|
|
{
|
|
"epoch": 69.0,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 1.5222116708755493,
|
|
"eval_runtime": 0.6773,
|
|
"eval_samples_per_second": 67.922,
|
|
"eval_steps_per_second": 4.43,
|
|
"step": 4157
|
|
},
|
|
{
|
|
"epoch": 69.05,
|
|
"learning_rate": 5.185185185185184e-06,
|
|
"loss": 0.012,
|
|
"step": 4160
|
|
},
|
|
{
|
|
"epoch": 69.21,
|
|
"learning_rate": 5.104166666666667e-06,
|
|
"loss": 0.0086,
|
|
"step": 4170
|
|
},
|
|
{
|
|
"epoch": 69.38,
|
|
"learning_rate": 5.023148148148148e-06,
|
|
"loss": 0.0203,
|
|
"step": 4180
|
|
},
|
|
{
|
|
"epoch": 69.54,
|
|
"learning_rate": 4.942129629629629e-06,
|
|
"loss": 0.0074,
|
|
"step": 4190
|
|
},
|
|
{
|
|
"epoch": 69.71,
|
|
"learning_rate": 4.8611111111111105e-06,
|
|
"loss": 0.0071,
|
|
"step": 4200
|
|
},
|
|
{
|
|
"epoch": 69.88,
|
|
"learning_rate": 4.780092592592592e-06,
|
|
"loss": 0.0068,
|
|
"step": 4210
|
|
},
|
|
{
|
|
"epoch": 69.99,
|
|
"eval_accuracy": 0.8260869565217391,
|
|
"eval_loss": 1.4339091777801514,
|
|
"eval_runtime": 0.686,
|
|
"eval_samples_per_second": 67.057,
|
|
"eval_steps_per_second": 4.373,
|
|
"step": 4217
|
|
},
|
|
{
|
|
"epoch": 70.04,
|
|
"learning_rate": 4.699074074074074e-06,
|
|
"loss": 0.0015,
|
|
"step": 4220
|
|
},
|
|
{
|
|
"epoch": 70.21,
|
|
"learning_rate": 4.618055555555555e-06,
|
|
"loss": 0.0096,
|
|
"step": 4230
|
|
},
|
|
{
|
|
"epoch": 70.37,
|
|
"learning_rate": 4.537037037037036e-06,
|
|
"loss": 0.0074,
|
|
"step": 4240
|
|
},
|
|
{
|
|
"epoch": 70.54,
|
|
"learning_rate": 4.456018518518519e-06,
|
|
"loss": 0.0172,
|
|
"step": 4250
|
|
},
|
|
{
|
|
"epoch": 70.71,
|
|
"learning_rate": 4.375e-06,
|
|
"loss": 0.0024,
|
|
"step": 4260
|
|
},
|
|
{
|
|
"epoch": 70.87,
|
|
"learning_rate": 4.293981481481481e-06,
|
|
"loss": 0.0342,
|
|
"step": 4270
|
|
},
|
|
{
|
|
"epoch": 70.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.6964349746704102,
|
|
"eval_runtime": 0.7042,
|
|
"eval_samples_per_second": 65.326,
|
|
"eval_steps_per_second": 4.26,
|
|
"step": 4277
|
|
},
|
|
{
|
|
"epoch": 71.04,
|
|
"learning_rate": 4.212962962962962e-06,
|
|
"loss": 0.0028,
|
|
"step": 4280
|
|
},
|
|
{
|
|
"epoch": 71.2,
|
|
"learning_rate": 4.131944444444444e-06,
|
|
"loss": 0.0185,
|
|
"step": 4290
|
|
},
|
|
{
|
|
"epoch": 71.37,
|
|
"learning_rate": 4.050925925925926e-06,
|
|
"loss": 0.0172,
|
|
"step": 4300
|
|
},
|
|
{
|
|
"epoch": 71.54,
|
|
"learning_rate": 3.969907407407407e-06,
|
|
"loss": 0.0051,
|
|
"step": 4310
|
|
},
|
|
{
|
|
"epoch": 71.7,
|
|
"learning_rate": 3.888888888888889e-06,
|
|
"loss": 0.0049,
|
|
"step": 4320
|
|
},
|
|
{
|
|
"epoch": 71.87,
|
|
"learning_rate": 3.80787037037037e-06,
|
|
"loss": 0.0077,
|
|
"step": 4330
|
|
},
|
|
{
|
|
"epoch": 72.0,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.6102274656295776,
|
|
"eval_runtime": 0.691,
|
|
"eval_samples_per_second": 66.569,
|
|
"eval_steps_per_second": 4.341,
|
|
"step": 4338
|
|
},
|
|
{
|
|
"epoch": 72.03,
|
|
"learning_rate": 3.7268518518518515e-06,
|
|
"loss": 0.0207,
|
|
"step": 4340
|
|
},
|
|
{
|
|
"epoch": 72.2,
|
|
"learning_rate": 3.6458333333333333e-06,
|
|
"loss": 0.0135,
|
|
"step": 4350
|
|
},
|
|
{
|
|
"epoch": 72.37,
|
|
"learning_rate": 3.5648148148148143e-06,
|
|
"loss": 0.0098,
|
|
"step": 4360
|
|
},
|
|
{
|
|
"epoch": 72.53,
|
|
"learning_rate": 3.483796296296296e-06,
|
|
"loss": 0.0207,
|
|
"step": 4370
|
|
},
|
|
{
|
|
"epoch": 72.7,
|
|
"learning_rate": 3.4027777777777774e-06,
|
|
"loss": 0.0007,
|
|
"step": 4380
|
|
},
|
|
{
|
|
"epoch": 72.86,
|
|
"learning_rate": 3.3217592592592592e-06,
|
|
"loss": 0.0043,
|
|
"step": 4390
|
|
},
|
|
{
|
|
"epoch": 73.0,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.668748140335083,
|
|
"eval_runtime": 0.7135,
|
|
"eval_samples_per_second": 64.472,
|
|
"eval_steps_per_second": 4.205,
|
|
"step": 4398
|
|
},
|
|
{
|
|
"epoch": 73.03,
|
|
"learning_rate": 3.24074074074074e-06,
|
|
"loss": 0.0045,
|
|
"step": 4400
|
|
},
|
|
{
|
|
"epoch": 73.2,
|
|
"learning_rate": 3.159722222222222e-06,
|
|
"loss": 0.0121,
|
|
"step": 4410
|
|
},
|
|
{
|
|
"epoch": 73.36,
|
|
"learning_rate": 3.0787037037037034e-06,
|
|
"loss": 0.0131,
|
|
"step": 4420
|
|
},
|
|
{
|
|
"epoch": 73.53,
|
|
"learning_rate": 2.997685185185185e-06,
|
|
"loss": 0.0134,
|
|
"step": 4430
|
|
},
|
|
{
|
|
"epoch": 73.69,
|
|
"learning_rate": 2.916666666666666e-06,
|
|
"loss": 0.0233,
|
|
"step": 4440
|
|
},
|
|
{
|
|
"epoch": 73.86,
|
|
"learning_rate": 2.835648148148148e-06,
|
|
"loss": 0.0131,
|
|
"step": 4450
|
|
},
|
|
{
|
|
"epoch": 73.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.6847338676452637,
|
|
"eval_runtime": 0.702,
|
|
"eval_samples_per_second": 65.531,
|
|
"eval_steps_per_second": 4.274,
|
|
"step": 4458
|
|
},
|
|
{
|
|
"epoch": 74.02,
|
|
"learning_rate": 2.7546296296296293e-06,
|
|
"loss": 0.0151,
|
|
"step": 4460
|
|
},
|
|
{
|
|
"epoch": 74.19,
|
|
"learning_rate": 2.673611111111111e-06,
|
|
"loss": 0.0031,
|
|
"step": 4470
|
|
},
|
|
{
|
|
"epoch": 74.36,
|
|
"learning_rate": 2.592592592592592e-06,
|
|
"loss": 0.0135,
|
|
"step": 4480
|
|
},
|
|
{
|
|
"epoch": 74.52,
|
|
"learning_rate": 2.511574074074074e-06,
|
|
"loss": 0.0121,
|
|
"step": 4490
|
|
},
|
|
{
|
|
"epoch": 74.69,
|
|
"learning_rate": 2.4305555555555552e-06,
|
|
"loss": 0.0173,
|
|
"step": 4500
|
|
},
|
|
{
|
|
"epoch": 74.85,
|
|
"learning_rate": 2.349537037037037e-06,
|
|
"loss": 0.0031,
|
|
"step": 4510
|
|
},
|
|
{
|
|
"epoch": 74.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.7195295095443726,
|
|
"eval_runtime": 0.7118,
|
|
"eval_samples_per_second": 64.626,
|
|
"eval_steps_per_second": 4.215,
|
|
"step": 4518
|
|
},
|
|
{
|
|
"epoch": 75.02,
|
|
"learning_rate": 2.268518518518518e-06,
|
|
"loss": 0.0069,
|
|
"step": 4520
|
|
},
|
|
{
|
|
"epoch": 75.19,
|
|
"learning_rate": 2.1875e-06,
|
|
"loss": 0.0063,
|
|
"step": 4530
|
|
},
|
|
{
|
|
"epoch": 75.35,
|
|
"learning_rate": 2.106481481481481e-06,
|
|
"loss": 0.0288,
|
|
"step": 4540
|
|
},
|
|
{
|
|
"epoch": 75.52,
|
|
"learning_rate": 2.025462962962963e-06,
|
|
"loss": 0.0162,
|
|
"step": 4550
|
|
},
|
|
{
|
|
"epoch": 75.68,
|
|
"learning_rate": 1.9444444444444444e-06,
|
|
"loss": 0.0041,
|
|
"step": 4560
|
|
},
|
|
{
|
|
"epoch": 75.85,
|
|
"learning_rate": 1.8634259259259257e-06,
|
|
"loss": 0.0087,
|
|
"step": 4570
|
|
},
|
|
{
|
|
"epoch": 76.0,
|
|
"eval_accuracy": 0.782608695652174,
|
|
"eval_loss": 1.720942735671997,
|
|
"eval_runtime": 0.666,
|
|
"eval_samples_per_second": 69.067,
|
|
"eval_steps_per_second": 4.504,
|
|
"step": 4579
|
|
},
|
|
{
|
|
"epoch": 76.02,
|
|
"learning_rate": 1.7824074074074071e-06,
|
|
"loss": 0.0145,
|
|
"step": 4580
|
|
},
|
|
{
|
|
"epoch": 76.18,
|
|
"learning_rate": 1.7013888888888887e-06,
|
|
"loss": 0.0056,
|
|
"step": 4590
|
|
},
|
|
{
|
|
"epoch": 76.35,
|
|
"learning_rate": 1.62037037037037e-06,
|
|
"loss": 0.0108,
|
|
"step": 4600
|
|
},
|
|
{
|
|
"epoch": 76.51,
|
|
"learning_rate": 1.5393518518518517e-06,
|
|
"loss": 0.0152,
|
|
"step": 4610
|
|
},
|
|
{
|
|
"epoch": 76.68,
|
|
"learning_rate": 1.458333333333333e-06,
|
|
"loss": 0.0126,
|
|
"step": 4620
|
|
},
|
|
{
|
|
"epoch": 76.85,
|
|
"learning_rate": 1.3773148148148147e-06,
|
|
"loss": 0.0219,
|
|
"step": 4630
|
|
},
|
|
{
|
|
"epoch": 77.0,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.6714533567428589,
|
|
"eval_runtime": 0.7121,
|
|
"eval_samples_per_second": 64.594,
|
|
"eval_steps_per_second": 4.213,
|
|
"step": 4639
|
|
},
|
|
{
|
|
"epoch": 77.01,
|
|
"learning_rate": 1.296296296296296e-06,
|
|
"loss": 0.0078,
|
|
"step": 4640
|
|
},
|
|
{
|
|
"epoch": 77.18,
|
|
"learning_rate": 1.2152777777777776e-06,
|
|
"loss": 0.0186,
|
|
"step": 4650
|
|
},
|
|
{
|
|
"epoch": 77.34,
|
|
"learning_rate": 1.134259259259259e-06,
|
|
"loss": 0.0096,
|
|
"step": 4660
|
|
},
|
|
{
|
|
"epoch": 77.51,
|
|
"learning_rate": 1.0532407407407406e-06,
|
|
"loss": 0.0015,
|
|
"step": 4670
|
|
},
|
|
{
|
|
"epoch": 77.68,
|
|
"learning_rate": 9.722222222222222e-07,
|
|
"loss": 0.005,
|
|
"step": 4680
|
|
},
|
|
{
|
|
"epoch": 77.84,
|
|
"learning_rate": 8.912037037037036e-07,
|
|
"loss": 0.0229,
|
|
"step": 4690
|
|
},
|
|
{
|
|
"epoch": 77.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.6823025941848755,
|
|
"eval_runtime": 0.6989,
|
|
"eval_samples_per_second": 65.82,
|
|
"eval_steps_per_second": 4.293,
|
|
"step": 4699
|
|
},
|
|
{
|
|
"epoch": 78.01,
|
|
"learning_rate": 8.10185185185185e-07,
|
|
"loss": 0.0117,
|
|
"step": 4700
|
|
},
|
|
{
|
|
"epoch": 78.17,
|
|
"learning_rate": 7.291666666666665e-07,
|
|
"loss": 0.0014,
|
|
"step": 4710
|
|
},
|
|
{
|
|
"epoch": 78.34,
|
|
"learning_rate": 6.48148148148148e-07,
|
|
"loss": 0.0039,
|
|
"step": 4720
|
|
},
|
|
{
|
|
"epoch": 78.51,
|
|
"learning_rate": 5.671296296296295e-07,
|
|
"loss": 0.0111,
|
|
"step": 4730
|
|
},
|
|
{
|
|
"epoch": 78.67,
|
|
"learning_rate": 4.861111111111111e-07,
|
|
"loss": 0.0129,
|
|
"step": 4740
|
|
},
|
|
{
|
|
"epoch": 78.84,
|
|
"learning_rate": 4.050925925925925e-07,
|
|
"loss": 0.008,
|
|
"step": 4750
|
|
},
|
|
{
|
|
"epoch": 78.99,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.6750718355178833,
|
|
"eval_runtime": 0.6921,
|
|
"eval_samples_per_second": 66.467,
|
|
"eval_steps_per_second": 4.335,
|
|
"step": 4759
|
|
},
|
|
{
|
|
"epoch": 79.0,
|
|
"learning_rate": 3.24074074074074e-07,
|
|
"loss": 0.0032,
|
|
"step": 4760
|
|
},
|
|
{
|
|
"epoch": 79.17,
|
|
"learning_rate": 2.4305555555555555e-07,
|
|
"loss": 0.0079,
|
|
"step": 4770
|
|
},
|
|
{
|
|
"epoch": 79.34,
|
|
"learning_rate": 1.62037037037037e-07,
|
|
"loss": 0.008,
|
|
"step": 4780
|
|
},
|
|
{
|
|
"epoch": 79.5,
|
|
"learning_rate": 8.10185185185185e-08,
|
|
"loss": 0.0057,
|
|
"step": 4790
|
|
},
|
|
{
|
|
"epoch": 79.67,
|
|
"learning_rate": 0.0,
|
|
"loss": 0.0051,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 79.67,
|
|
"eval_accuracy": 0.8043478260869565,
|
|
"eval_loss": 1.675818681716919,
|
|
"eval_runtime": 0.6446,
|
|
"eval_samples_per_second": 71.357,
|
|
"eval_steps_per_second": 4.654,
|
|
"step": 4800
|
|
},
|
|
{
|
|
"epoch": 79.67,
|
|
"step": 4800,
|
|
"total_flos": 2.3777433087459287e+19,
|
|
"train_loss": 0.12199072747869649,
|
|
"train_runtime": 7091.3369,
|
|
"train_samples_per_second": 43.467,
|
|
"train_steps_per_second": 0.677
|
|
}
|
|
],
|
|
"logging_steps": 10,
|
|
"max_steps": 4800,
|
|
"num_input_tokens_seen": 0,
|
|
"num_train_epochs": 80,
|
|
"save_steps": 500,
|
|
"total_flos": 2.3777433087459287e+19,
|
|
"train_batch_size": 16,
|
|
"trial_name": null,
|
|
"trial_params": null
|
|
}
|
|
|