|
{ |
|
"best_metric": 0.8591549295774648, |
|
"best_model_checkpoint": "beit-base-patch16-224-65-fold3/checkpoint-97", |
|
"epoch": 92.3076923076923, |
|
"eval_steps": 500, |
|
"global_step": 300, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.9230769230769231, |
|
"eval_accuracy": 0.5211267605633803, |
|
"eval_loss": 0.8549354672431946, |
|
"eval_runtime": 1.1595, |
|
"eval_samples_per_second": 61.235, |
|
"eval_steps_per_second": 2.587, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 1.8461538461538463, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.697589099407196, |
|
"eval_runtime": 1.1208, |
|
"eval_samples_per_second": 63.345, |
|
"eval_steps_per_second": 2.677, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 2.769230769230769, |
|
"eval_accuracy": 0.5633802816901409, |
|
"eval_loss": 0.6808653473854065, |
|
"eval_runtime": 1.0746, |
|
"eval_samples_per_second": 66.072, |
|
"eval_steps_per_second": 2.792, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 3.076923076923077, |
|
"grad_norm": 5.197551727294922, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.7778, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_accuracy": 0.6056338028169014, |
|
"eval_loss": 0.645892322063446, |
|
"eval_runtime": 1.0976, |
|
"eval_samples_per_second": 64.689, |
|
"eval_steps_per_second": 2.733, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 4.923076923076923, |
|
"eval_accuracy": 0.6338028169014085, |
|
"eval_loss": 0.6352745294570923, |
|
"eval_runtime": 1.1067, |
|
"eval_samples_per_second": 64.154, |
|
"eval_steps_per_second": 2.711, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 5.846153846153846, |
|
"eval_accuracy": 0.6197183098591549, |
|
"eval_loss": 0.6140676736831665, |
|
"eval_runtime": 1.1233, |
|
"eval_samples_per_second": 63.206, |
|
"eval_steps_per_second": 2.671, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 6.153846153846154, |
|
"grad_norm": 8.811356544494629, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.6542, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 6.769230769230769, |
|
"eval_accuracy": 0.6056338028169014, |
|
"eval_loss": 0.6002594232559204, |
|
"eval_runtime": 1.0899, |
|
"eval_samples_per_second": 65.144, |
|
"eval_steps_per_second": 2.753, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_accuracy": 0.676056338028169, |
|
"eval_loss": 0.6167530417442322, |
|
"eval_runtime": 1.1246, |
|
"eval_samples_per_second": 63.133, |
|
"eval_steps_per_second": 2.668, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 8.923076923076923, |
|
"eval_accuracy": 0.6901408450704225, |
|
"eval_loss": 0.5781181454658508, |
|
"eval_runtime": 1.1021, |
|
"eval_samples_per_second": 64.421, |
|
"eval_steps_per_second": 2.722, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 9.23076923076923, |
|
"grad_norm": 11.113332748413086, |
|
"learning_rate": 5e-05, |
|
"loss": 0.5817, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 9.846153846153847, |
|
"eval_accuracy": 0.7323943661971831, |
|
"eval_loss": 0.5710453391075134, |
|
"eval_runtime": 1.1302, |
|
"eval_samples_per_second": 62.82, |
|
"eval_steps_per_second": 2.654, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 10.76923076923077, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.5344887971878052, |
|
"eval_runtime": 1.0983, |
|
"eval_samples_per_second": 64.645, |
|
"eval_steps_per_second": 2.731, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_accuracy": 0.647887323943662, |
|
"eval_loss": 0.6058281064033508, |
|
"eval_runtime": 1.0953, |
|
"eval_samples_per_second": 64.82, |
|
"eval_steps_per_second": 2.739, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 12.307692307692308, |
|
"grad_norm": 7.416208744049072, |
|
"learning_rate": 4.814814814814815e-05, |
|
"loss": 0.513, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 12.923076923076923, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.6432794332504272, |
|
"eval_runtime": 1.1166, |
|
"eval_samples_per_second": 63.586, |
|
"eval_steps_per_second": 2.687, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 13.846153846153847, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.5830168128013611, |
|
"eval_runtime": 1.0859, |
|
"eval_samples_per_second": 65.381, |
|
"eval_steps_per_second": 2.763, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 14.76923076923077, |
|
"eval_accuracy": 0.704225352112676, |
|
"eval_loss": 0.6167191863059998, |
|
"eval_runtime": 1.1037, |
|
"eval_samples_per_second": 64.331, |
|
"eval_steps_per_second": 2.718, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 15.384615384615385, |
|
"grad_norm": 4.344594955444336, |
|
"learning_rate": 4.62962962962963e-05, |
|
"loss": 0.4756, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_accuracy": 0.6338028169014085, |
|
"eval_loss": 0.7304334044456482, |
|
"eval_runtime": 1.1067, |
|
"eval_samples_per_second": 64.155, |
|
"eval_steps_per_second": 2.711, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 16.923076923076923, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.5485327839851379, |
|
"eval_runtime": 1.1071, |
|
"eval_samples_per_second": 64.132, |
|
"eval_steps_per_second": 2.71, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 17.846153846153847, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.5166436433792114, |
|
"eval_runtime": 1.1089, |
|
"eval_samples_per_second": 64.028, |
|
"eval_steps_per_second": 2.705, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 18.46153846153846, |
|
"grad_norm": 11.026878356933594, |
|
"learning_rate": 4.4444444444444447e-05, |
|
"loss": 0.4123, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 18.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.6266874670982361, |
|
"eval_runtime": 1.0972, |
|
"eval_samples_per_second": 64.709, |
|
"eval_steps_per_second": 2.734, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.42525675892829895, |
|
"eval_runtime": 1.125, |
|
"eval_samples_per_second": 63.111, |
|
"eval_steps_per_second": 2.667, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 20.923076923076923, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.4697897434234619, |
|
"eval_runtime": 1.069, |
|
"eval_samples_per_second": 66.415, |
|
"eval_steps_per_second": 2.806, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 21.53846153846154, |
|
"grad_norm": 6.852389335632324, |
|
"learning_rate": 4.259259259259259e-05, |
|
"loss": 0.3745, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 21.846153846153847, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5312201976776123, |
|
"eval_runtime": 1.1106, |
|
"eval_samples_per_second": 63.927, |
|
"eval_steps_per_second": 2.701, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 22.76923076923077, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.5157667398452759, |
|
"eval_runtime": 1.0999, |
|
"eval_samples_per_second": 64.553, |
|
"eval_steps_per_second": 2.728, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.5968685746192932, |
|
"eval_runtime": 1.072, |
|
"eval_samples_per_second": 66.231, |
|
"eval_steps_per_second": 2.798, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 24.615384615384617, |
|
"grad_norm": 8.991398811340332, |
|
"learning_rate": 4.074074074074074e-05, |
|
"loss": 0.3751, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 24.923076923076923, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.541881799697876, |
|
"eval_runtime": 1.074, |
|
"eval_samples_per_second": 66.11, |
|
"eval_steps_per_second": 2.793, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 25.846153846153847, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.463002473115921, |
|
"eval_runtime": 1.09, |
|
"eval_samples_per_second": 65.141, |
|
"eval_steps_per_second": 2.752, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 26.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.5366793870925903, |
|
"eval_runtime": 1.0752, |
|
"eval_samples_per_second": 66.032, |
|
"eval_steps_per_second": 2.79, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 27.692307692307693, |
|
"grad_norm": 3.5171091556549072, |
|
"learning_rate": 3.888888888888889e-05, |
|
"loss": 0.3079, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.5220130085945129, |
|
"eval_runtime": 1.0931, |
|
"eval_samples_per_second": 64.955, |
|
"eval_steps_per_second": 2.745, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 28.923076923076923, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5341811180114746, |
|
"eval_runtime": 1.1125, |
|
"eval_samples_per_second": 63.819, |
|
"eval_steps_per_second": 2.697, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 29.846153846153847, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.5711377859115601, |
|
"eval_runtime": 1.0957, |
|
"eval_samples_per_second": 64.797, |
|
"eval_steps_per_second": 2.738, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"grad_norm": 7.372060298919678, |
|
"learning_rate": 3.7037037037037037e-05, |
|
"loss": 0.2831, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 30.76923076923077, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.575693666934967, |
|
"eval_runtime": 1.0914, |
|
"eval_samples_per_second": 65.056, |
|
"eval_steps_per_second": 2.749, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.5199587345123291, |
|
"eval_runtime": 1.0974, |
|
"eval_samples_per_second": 64.696, |
|
"eval_steps_per_second": 2.734, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 32.92307692307692, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.4495673179626465, |
|
"eval_runtime": 1.0764, |
|
"eval_samples_per_second": 65.963, |
|
"eval_steps_per_second": 2.787, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"grad_norm": 14.049703598022461, |
|
"learning_rate": 3.518518518518519e-05, |
|
"loss": 0.292, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 33.84615384615385, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.6480037569999695, |
|
"eval_runtime": 1.1161, |
|
"eval_samples_per_second": 63.613, |
|
"eval_steps_per_second": 2.688, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 34.76923076923077, |
|
"eval_accuracy": 0.7464788732394366, |
|
"eval_loss": 0.6956098079681396, |
|
"eval_runtime": 1.1028, |
|
"eval_samples_per_second": 64.382, |
|
"eval_steps_per_second": 2.72, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5629093647003174, |
|
"eval_runtime": 1.0914, |
|
"eval_samples_per_second": 65.052, |
|
"eval_steps_per_second": 2.749, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"grad_norm": 6.377761363983154, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 0.2712, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 36.92307692307692, |
|
"eval_accuracy": 0.6901408450704225, |
|
"eval_loss": 0.761414110660553, |
|
"eval_runtime": 1.1022, |
|
"eval_samples_per_second": 64.414, |
|
"eval_steps_per_second": 2.722, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 37.84615384615385, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.5624626278877258, |
|
"eval_runtime": 1.076, |
|
"eval_samples_per_second": 65.988, |
|
"eval_steps_per_second": 2.788, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 38.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.571108877658844, |
|
"eval_runtime": 1.098, |
|
"eval_samples_per_second": 64.66, |
|
"eval_steps_per_second": 2.732, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"grad_norm": 5.312506198883057, |
|
"learning_rate": 3.148148148148148e-05, |
|
"loss": 0.2447, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.5476051568984985, |
|
"eval_runtime": 1.0921, |
|
"eval_samples_per_second": 65.011, |
|
"eval_steps_per_second": 2.747, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 40.92307692307692, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.5354418754577637, |
|
"eval_runtime": 1.0697, |
|
"eval_samples_per_second": 66.374, |
|
"eval_steps_per_second": 2.805, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 41.84615384615385, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5217384099960327, |
|
"eval_runtime": 1.1215, |
|
"eval_samples_per_second": 63.311, |
|
"eval_steps_per_second": 2.675, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 42.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.576663613319397, |
|
"eval_runtime": 1.1012, |
|
"eval_samples_per_second": 64.478, |
|
"eval_steps_per_second": 2.724, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 43.07692307692308, |
|
"grad_norm": 9.378477096557617, |
|
"learning_rate": 2.962962962962963e-05, |
|
"loss": 0.185, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5605924129486084, |
|
"eval_runtime": 1.09, |
|
"eval_samples_per_second": 65.14, |
|
"eval_steps_per_second": 2.752, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 44.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.67192542552948, |
|
"eval_runtime": 1.103, |
|
"eval_samples_per_second": 64.369, |
|
"eval_steps_per_second": 2.72, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 45.84615384615385, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6073798537254333, |
|
"eval_runtime": 1.071, |
|
"eval_samples_per_second": 66.295, |
|
"eval_steps_per_second": 2.801, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 46.15384615384615, |
|
"grad_norm": 11.159360885620117, |
|
"learning_rate": 2.777777777777778e-05, |
|
"loss": 0.1921, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 46.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.6350691318511963, |
|
"eval_runtime": 1.1244, |
|
"eval_samples_per_second": 63.143, |
|
"eval_steps_per_second": 2.668, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.5916067957878113, |
|
"eval_runtime": 1.081, |
|
"eval_samples_per_second": 65.681, |
|
"eval_steps_per_second": 2.775, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 48.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6103379130363464, |
|
"eval_runtime": 1.0963, |
|
"eval_samples_per_second": 64.761, |
|
"eval_steps_per_second": 2.736, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 49.23076923076923, |
|
"grad_norm": 5.742426872253418, |
|
"learning_rate": 2.5925925925925925e-05, |
|
"loss": 0.1844, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 49.84615384615385, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5758461952209473, |
|
"eval_runtime": 1.1062, |
|
"eval_samples_per_second": 64.182, |
|
"eval_steps_per_second": 2.712, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 50.76923076923077, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5496791005134583, |
|
"eval_runtime": 1.106, |
|
"eval_samples_per_second": 64.196, |
|
"eval_steps_per_second": 2.713, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 52.0, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.5377268195152283, |
|
"eval_runtime": 1.0974, |
|
"eval_samples_per_second": 64.698, |
|
"eval_steps_per_second": 2.734, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 52.30769230769231, |
|
"grad_norm": 5.0976691246032715, |
|
"learning_rate": 2.4074074074074074e-05, |
|
"loss": 0.17, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 52.92307692307692, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.6278994679450989, |
|
"eval_runtime": 1.0996, |
|
"eval_samples_per_second": 64.568, |
|
"eval_steps_per_second": 2.728, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 53.84615384615385, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.5825640559196472, |
|
"eval_runtime": 1.104, |
|
"eval_samples_per_second": 64.312, |
|
"eval_steps_per_second": 2.717, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 54.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.7172585725784302, |
|
"eval_runtime": 1.0805, |
|
"eval_samples_per_second": 65.708, |
|
"eval_steps_per_second": 2.776, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 55.38461538461539, |
|
"grad_norm": 5.814525604248047, |
|
"learning_rate": 2.2222222222222223e-05, |
|
"loss": 0.1724, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 56.0, |
|
"eval_accuracy": 0.8450704225352113, |
|
"eval_loss": 0.5339981913566589, |
|
"eval_runtime": 1.1095, |
|
"eval_samples_per_second": 63.99, |
|
"eval_steps_per_second": 2.704, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 56.92307692307692, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.5527878403663635, |
|
"eval_runtime": 1.1322, |
|
"eval_samples_per_second": 62.71, |
|
"eval_steps_per_second": 2.65, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 57.84615384615385, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6547489166259766, |
|
"eval_runtime": 1.1043, |
|
"eval_samples_per_second": 64.297, |
|
"eval_steps_per_second": 2.717, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 58.46153846153846, |
|
"grad_norm": 5.154381275177002, |
|
"learning_rate": 2.037037037037037e-05, |
|
"loss": 0.1734, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 58.76923076923077, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.5986492037773132, |
|
"eval_runtime": 1.0821, |
|
"eval_samples_per_second": 65.612, |
|
"eval_steps_per_second": 2.772, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 60.0, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6057180762290955, |
|
"eval_runtime": 1.1087, |
|
"eval_samples_per_second": 64.039, |
|
"eval_steps_per_second": 2.706, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 60.92307692307692, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.7183056473731995, |
|
"eval_runtime": 1.0617, |
|
"eval_samples_per_second": 66.874, |
|
"eval_steps_per_second": 2.826, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 61.53846153846154, |
|
"grad_norm": 4.651764392852783, |
|
"learning_rate": 1.8518518518518518e-05, |
|
"loss": 0.1582, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 61.84615384615385, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.5912399291992188, |
|
"eval_runtime": 1.0929, |
|
"eval_samples_per_second": 64.966, |
|
"eval_steps_per_second": 2.745, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 62.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6002036333084106, |
|
"eval_runtime": 1.0987, |
|
"eval_samples_per_second": 64.622, |
|
"eval_steps_per_second": 2.73, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 64.0, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7886073589324951, |
|
"eval_runtime": 1.0996, |
|
"eval_samples_per_second": 64.566, |
|
"eval_steps_per_second": 2.728, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 64.61538461538461, |
|
"grad_norm": 4.254072666168213, |
|
"learning_rate": 1.6666666666666667e-05, |
|
"loss": 0.1372, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 64.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.7019013166427612, |
|
"eval_runtime": 1.091, |
|
"eval_samples_per_second": 65.079, |
|
"eval_steps_per_second": 2.75, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 65.84615384615384, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.6460186243057251, |
|
"eval_runtime": 1.1036, |
|
"eval_samples_per_second": 64.335, |
|
"eval_steps_per_second": 2.718, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 66.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6934826970100403, |
|
"eval_runtime": 1.0674, |
|
"eval_samples_per_second": 66.518, |
|
"eval_steps_per_second": 2.811, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 67.6923076923077, |
|
"grad_norm": 8.02208137512207, |
|
"learning_rate": 1.4814814814814815e-05, |
|
"loss": 0.153, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 68.0, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.8107859492301941, |
|
"eval_runtime": 1.1001, |
|
"eval_samples_per_second": 64.542, |
|
"eval_steps_per_second": 2.727, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 68.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.7539263963699341, |
|
"eval_runtime": 1.1222, |
|
"eval_samples_per_second": 63.268, |
|
"eval_steps_per_second": 2.673, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 69.84615384615384, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.7089611887931824, |
|
"eval_runtime": 1.1229, |
|
"eval_samples_per_second": 63.227, |
|
"eval_steps_per_second": 2.672, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"grad_norm": 4.6820597648620605, |
|
"learning_rate": 1.2962962962962962e-05, |
|
"loss": 0.1512, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 70.76923076923077, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.7146645188331604, |
|
"eval_runtime": 1.1135, |
|
"eval_samples_per_second": 63.762, |
|
"eval_steps_per_second": 2.694, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 72.0, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.8680395483970642, |
|
"eval_runtime": 1.0821, |
|
"eval_samples_per_second": 65.611, |
|
"eval_steps_per_second": 2.772, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 72.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.8785440325737, |
|
"eval_runtime": 1.1317, |
|
"eval_samples_per_second": 62.738, |
|
"eval_steps_per_second": 2.651, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"grad_norm": 5.328825950622559, |
|
"learning_rate": 1.1111111111111112e-05, |
|
"loss": 0.1381, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 73.84615384615384, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.7413442134857178, |
|
"eval_runtime": 1.1102, |
|
"eval_samples_per_second": 63.955, |
|
"eval_steps_per_second": 2.702, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 74.76923076923077, |
|
"eval_accuracy": 0.8169014084507042, |
|
"eval_loss": 0.7254679799079895, |
|
"eval_runtime": 1.0871, |
|
"eval_samples_per_second": 65.312, |
|
"eval_steps_per_second": 2.76, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 76.0, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.7123715877532959, |
|
"eval_runtime": 1.1015, |
|
"eval_samples_per_second": 64.455, |
|
"eval_steps_per_second": 2.723, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"grad_norm": 6.364928245544434, |
|
"learning_rate": 9.259259259259259e-06, |
|
"loss": 0.1432, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 76.92307692307692, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.734306275844574, |
|
"eval_runtime": 1.093, |
|
"eval_samples_per_second": 64.958, |
|
"eval_steps_per_second": 2.745, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 77.84615384615384, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.7403556704521179, |
|
"eval_runtime": 1.1105, |
|
"eval_samples_per_second": 63.937, |
|
"eval_steps_per_second": 2.702, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 78.76923076923077, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6940977573394775, |
|
"eval_runtime": 1.08, |
|
"eval_samples_per_second": 65.743, |
|
"eval_steps_per_second": 2.778, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"grad_norm": 8.516518592834473, |
|
"learning_rate": 7.4074074074074075e-06, |
|
"loss": 0.1135, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.0, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.6721236109733582, |
|
"eval_runtime": 1.102, |
|
"eval_samples_per_second": 64.43, |
|
"eval_steps_per_second": 2.722, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 80.92307692307692, |
|
"eval_accuracy": 0.8309859154929577, |
|
"eval_loss": 0.6691632866859436, |
|
"eval_runtime": 1.0893, |
|
"eval_samples_per_second": 65.178, |
|
"eval_steps_per_second": 2.754, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 81.84615384615384, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6879910826683044, |
|
"eval_runtime": 1.0969, |
|
"eval_samples_per_second": 64.728, |
|
"eval_steps_per_second": 2.735, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 82.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6856514811515808, |
|
"eval_runtime": 1.0765, |
|
"eval_samples_per_second": 65.955, |
|
"eval_steps_per_second": 2.787, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 83.07692307692308, |
|
"grad_norm": 4.385526657104492, |
|
"learning_rate": 5.555555555555556e-06, |
|
"loss": 0.1182, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 84.0, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6849518418312073, |
|
"eval_runtime": 1.1279, |
|
"eval_samples_per_second": 62.95, |
|
"eval_steps_per_second": 2.66, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 84.92307692307692, |
|
"eval_accuracy": 0.7887323943661971, |
|
"eval_loss": 0.6815563440322876, |
|
"eval_runtime": 1.0923, |
|
"eval_samples_per_second": 64.998, |
|
"eval_steps_per_second": 2.746, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 85.84615384615384, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.7047742605209351, |
|
"eval_runtime": 1.1236, |
|
"eval_samples_per_second": 63.187, |
|
"eval_steps_per_second": 2.67, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 86.15384615384616, |
|
"grad_norm": 5.567570209503174, |
|
"learning_rate": 3.7037037037037037e-06, |
|
"loss": 0.1019, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 86.76923076923077, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.7803800702095032, |
|
"eval_runtime": 1.0996, |
|
"eval_samples_per_second": 64.567, |
|
"eval_steps_per_second": 2.728, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 88.0, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.8013417720794678, |
|
"eval_runtime": 1.1133, |
|
"eval_samples_per_second": 63.773, |
|
"eval_steps_per_second": 2.695, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 88.92307692307692, |
|
"eval_accuracy": 0.7605633802816901, |
|
"eval_loss": 0.7505632638931274, |
|
"eval_runtime": 1.1302, |
|
"eval_samples_per_second": 62.82, |
|
"eval_steps_per_second": 2.654, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 89.23076923076923, |
|
"grad_norm": 3.6284656524658203, |
|
"learning_rate": 1.8518518518518519e-06, |
|
"loss": 0.1163, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 89.84615384615384, |
|
"eval_accuracy": 0.7746478873239436, |
|
"eval_loss": 0.7047209143638611, |
|
"eval_runtime": 1.1154, |
|
"eval_samples_per_second": 63.653, |
|
"eval_steps_per_second": 2.69, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 90.76923076923077, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6762815713882446, |
|
"eval_runtime": 1.1157, |
|
"eval_samples_per_second": 63.635, |
|
"eval_steps_per_second": 2.689, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 92.0, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6605757474899292, |
|
"eval_runtime": 1.1108, |
|
"eval_samples_per_second": 63.918, |
|
"eval_steps_per_second": 2.701, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"grad_norm": 3.279923439025879, |
|
"learning_rate": 0.0, |
|
"loss": 0.1258, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.8028169014084507, |
|
"eval_loss": 0.6592366695404053, |
|
"eval_runtime": 1.0928, |
|
"eval_samples_per_second": 64.968, |
|
"eval_steps_per_second": 2.745, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"step": 300, |
|
"total_flos": 2.839022453308834e+18, |
|
"train_loss": 0.26989306092262266, |
|
"train_runtime": 1732.8571, |
|
"train_samples_per_second": 22.91, |
|
"train_steps_per_second": 0.173 |
|
}, |
|
{ |
|
"epoch": 92.3076923076923, |
|
"eval_accuracy": 0.8591549295774648, |
|
"eval_loss": 0.5711377859115601, |
|
"eval_runtime": 1.0772, |
|
"eval_samples_per_second": 65.91, |
|
"eval_steps_per_second": 2.785, |
|
"step": 300 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 300, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 500, |
|
"total_flos": 2.839022453308834e+18, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|