|
{ |
|
"best_metric": 0.5319148936170213, |
|
"best_model_checkpoint": "MAE-CT-CPC-Dicotomized-v7-tricot/checkpoint-3840", |
|
"epoch": 98.00759493670886, |
|
"eval_steps": 500, |
|
"global_step": 7900, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0012658227848101266, |
|
"grad_norm": 3.648193836212158, |
|
"learning_rate": 1.2658227848101266e-07, |
|
"loss": 1.0945, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.002531645569620253, |
|
"grad_norm": 3.2326066493988037, |
|
"learning_rate": 2.5316455696202533e-07, |
|
"loss": 1.0964, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0037974683544303796, |
|
"grad_norm": 4.472383975982666, |
|
"learning_rate": 3.79746835443038e-07, |
|
"loss": 1.1085, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.005063291139240506, |
|
"grad_norm": 2.293823003768921, |
|
"learning_rate": 5.063291139240507e-07, |
|
"loss": 1.0936, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.006329113924050633, |
|
"grad_norm": 5.018352508544922, |
|
"learning_rate": 6.329113924050634e-07, |
|
"loss": 1.0804, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.007594936708860759, |
|
"grad_norm": 3.437401533126831, |
|
"learning_rate": 7.59493670886076e-07, |
|
"loss": 1.1052, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.008860759493670886, |
|
"grad_norm": 2.633840322494507, |
|
"learning_rate": 8.860759493670887e-07, |
|
"loss": 1.1041, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.010126582278481013, |
|
"grad_norm": 8.326930046081543, |
|
"learning_rate": 1.0126582278481013e-06, |
|
"loss": 1.0989, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.010126582278481013, |
|
"eval_accuracy": 0.3191489361702128, |
|
"eval_loss": 1.1030582189559937, |
|
"eval_runtime": 10.2981, |
|
"eval_samples_per_second": 4.564, |
|
"eval_steps_per_second": 1.165, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 1.0012658227848101, |
|
"grad_norm": 5.40225076675415, |
|
"learning_rate": 1.139240506329114e-06, |
|
"loss": 1.0679, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 1.0025316455696203, |
|
"grad_norm": 2.765038013458252, |
|
"learning_rate": 1.2658227848101267e-06, |
|
"loss": 1.1039, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 1.0037974683544304, |
|
"grad_norm": 4.477443695068359, |
|
"learning_rate": 1.3924050632911392e-06, |
|
"loss": 1.0935, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 1.0050632911392405, |
|
"grad_norm": 4.2104573249816895, |
|
"learning_rate": 1.518987341772152e-06, |
|
"loss": 1.0694, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 1.0063291139240507, |
|
"grad_norm": 3.7104029655456543, |
|
"learning_rate": 1.6455696202531647e-06, |
|
"loss": 1.1194, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 1.0075949367088608, |
|
"grad_norm": 6.592172622680664, |
|
"learning_rate": 1.7721518987341774e-06, |
|
"loss": 1.1171, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 1.008860759493671, |
|
"grad_norm": 5.555737018585205, |
|
"learning_rate": 1.8987341772151901e-06, |
|
"loss": 1.0965, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 1.010126582278481, |
|
"grad_norm": 6.766844749450684, |
|
"learning_rate": 2.0253164556962026e-06, |
|
"loss": 1.0889, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 1.010126582278481, |
|
"eval_accuracy": 0.3404255319148936, |
|
"eval_loss": 1.1057974100112915, |
|
"eval_runtime": 9.1496, |
|
"eval_samples_per_second": 5.137, |
|
"eval_steps_per_second": 1.312, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 2.00126582278481, |
|
"grad_norm": 5.130347728729248, |
|
"learning_rate": 2.1518987341772153e-06, |
|
"loss": 1.0725, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 2.0025316455696203, |
|
"grad_norm": 5.7678070068359375, |
|
"learning_rate": 2.278481012658228e-06, |
|
"loss": 1.0791, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 2.0037974683544304, |
|
"grad_norm": 6.685475826263428, |
|
"learning_rate": 2.4050632911392408e-06, |
|
"loss": 1.0906, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 2.0050632911392405, |
|
"grad_norm": 4.179187774658203, |
|
"learning_rate": 2.5316455696202535e-06, |
|
"loss": 1.0754, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 2.0063291139240507, |
|
"grad_norm": 7.93744421005249, |
|
"learning_rate": 2.6582278481012658e-06, |
|
"loss": 1.0612, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 2.007594936708861, |
|
"grad_norm": 5.547979354858398, |
|
"learning_rate": 2.7848101265822785e-06, |
|
"loss": 1.0703, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 2.008860759493671, |
|
"grad_norm": 6.917874813079834, |
|
"learning_rate": 2.9113924050632912e-06, |
|
"loss": 1.0648, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 2.010126582278481, |
|
"grad_norm": 14.24355697631836, |
|
"learning_rate": 3.037974683544304e-06, |
|
"loss": 1.0739, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 2.010126582278481, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 1.1232898235321045, |
|
"eval_runtime": 8.996, |
|
"eval_samples_per_second": 5.225, |
|
"eval_steps_per_second": 1.334, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 3.00126582278481, |
|
"grad_norm": 11.271843910217285, |
|
"learning_rate": 3.164556962025317e-06, |
|
"loss": 1.0305, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 3.0025316455696203, |
|
"grad_norm": 12.815051078796387, |
|
"learning_rate": 3.2911392405063294e-06, |
|
"loss": 1.0576, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 3.0037974683544304, |
|
"grad_norm": 19.665115356445312, |
|
"learning_rate": 3.417721518987342e-06, |
|
"loss": 0.9806, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 3.0050632911392405, |
|
"grad_norm": 11.805643081665039, |
|
"learning_rate": 3.544303797468355e-06, |
|
"loss": 1.0493, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 3.0063291139240507, |
|
"grad_norm": 17.121118545532227, |
|
"learning_rate": 3.6708860759493675e-06, |
|
"loss": 1.0728, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 3.007594936708861, |
|
"grad_norm": 18.008495330810547, |
|
"learning_rate": 3.7974683544303802e-06, |
|
"loss": 1.0749, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 3.008860759493671, |
|
"grad_norm": 7.478245735168457, |
|
"learning_rate": 3.924050632911393e-06, |
|
"loss": 1.0579, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 3.010126582278481, |
|
"grad_norm": 29.086742401123047, |
|
"learning_rate": 4.050632911392405e-06, |
|
"loss": 1.0036, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 3.010126582278481, |
|
"eval_accuracy": 0.2765957446808511, |
|
"eval_loss": 1.1595509052276611, |
|
"eval_runtime": 9.1259, |
|
"eval_samples_per_second": 5.15, |
|
"eval_steps_per_second": 1.315, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 4.00126582278481, |
|
"grad_norm": 14.256952285766602, |
|
"learning_rate": 4.177215189873418e-06, |
|
"loss": 1.0293, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 4.00253164556962, |
|
"grad_norm": 14.238683700561523, |
|
"learning_rate": 4.303797468354431e-06, |
|
"loss": 0.9938, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 4.00379746835443, |
|
"grad_norm": 15.750340461730957, |
|
"learning_rate": 4.430379746835443e-06, |
|
"loss": 1.0362, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 4.0050632911392405, |
|
"grad_norm": 24.7191219329834, |
|
"learning_rate": 4.556962025316456e-06, |
|
"loss": 0.9665, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 4.006329113924051, |
|
"grad_norm": 17.434118270874023, |
|
"learning_rate": 4.683544303797468e-06, |
|
"loss": 0.9927, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 4.007594936708861, |
|
"grad_norm": 5.921336650848389, |
|
"learning_rate": 4.8101265822784815e-06, |
|
"loss": 0.9862, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 4.008860759493671, |
|
"grad_norm": 6.861782550811768, |
|
"learning_rate": 4.936708860759495e-06, |
|
"loss": 1.1099, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 4.010126582278481, |
|
"grad_norm": 26.024229049682617, |
|
"learning_rate": 5.063291139240507e-06, |
|
"loss": 1.0706, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 4.010126582278481, |
|
"eval_accuracy": 0.2553191489361702, |
|
"eval_loss": 1.1730738878250122, |
|
"eval_runtime": 9.0971, |
|
"eval_samples_per_second": 5.166, |
|
"eval_steps_per_second": 1.319, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 5.00126582278481, |
|
"grad_norm": 13.2531099319458, |
|
"learning_rate": 5.189873417721519e-06, |
|
"loss": 0.8603, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 5.00253164556962, |
|
"grad_norm": 11.527708053588867, |
|
"learning_rate": 5.3164556962025316e-06, |
|
"loss": 0.9762, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 5.00379746835443, |
|
"grad_norm": 25.327789306640625, |
|
"learning_rate": 5.443037974683545e-06, |
|
"loss": 0.9512, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 5.0050632911392405, |
|
"grad_norm": 24.11504554748535, |
|
"learning_rate": 5.569620253164557e-06, |
|
"loss": 0.9437, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 5.006329113924051, |
|
"grad_norm": 26.003135681152344, |
|
"learning_rate": 5.69620253164557e-06, |
|
"loss": 0.9311, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 5.007594936708861, |
|
"grad_norm": 22.07634735107422, |
|
"learning_rate": 5.8227848101265824e-06, |
|
"loss": 0.9741, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 5.008860759493671, |
|
"grad_norm": 19.476099014282227, |
|
"learning_rate": 5.949367088607595e-06, |
|
"loss": 0.9916, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 5.010126582278481, |
|
"grad_norm": 24.048255920410156, |
|
"learning_rate": 6.075949367088608e-06, |
|
"loss": 0.9669, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 5.010126582278481, |
|
"eval_accuracy": 0.3191489361702128, |
|
"eval_loss": 1.1227548122406006, |
|
"eval_runtime": 9.13, |
|
"eval_samples_per_second": 5.148, |
|
"eval_steps_per_second": 1.314, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 6.00126582278481, |
|
"grad_norm": 21.775312423706055, |
|
"learning_rate": 6.20253164556962e-06, |
|
"loss": 0.8955, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 6.00253164556962, |
|
"grad_norm": 10.735696792602539, |
|
"learning_rate": 6.329113924050634e-06, |
|
"loss": 0.9152, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 6.00379746835443, |
|
"grad_norm": 29.428773880004883, |
|
"learning_rate": 6.4556962025316464e-06, |
|
"loss": 0.9614, |
|
"step": 510 |
|
}, |
|
{ |
|
"epoch": 6.0050632911392405, |
|
"grad_norm": 21.473602294921875, |
|
"learning_rate": 6.582278481012659e-06, |
|
"loss": 0.9911, |
|
"step": 520 |
|
}, |
|
{ |
|
"epoch": 6.006329113924051, |
|
"grad_norm": 22.8590087890625, |
|
"learning_rate": 6.708860759493672e-06, |
|
"loss": 0.9406, |
|
"step": 530 |
|
}, |
|
{ |
|
"epoch": 6.007594936708861, |
|
"grad_norm": 25.129230499267578, |
|
"learning_rate": 6.835443037974684e-06, |
|
"loss": 0.9051, |
|
"step": 540 |
|
}, |
|
{ |
|
"epoch": 6.008860759493671, |
|
"grad_norm": 34.37338638305664, |
|
"learning_rate": 6.962025316455697e-06, |
|
"loss": 0.8215, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 6.010126582278481, |
|
"grad_norm": 33.80929946899414, |
|
"learning_rate": 7.08860759493671e-06, |
|
"loss": 1.0233, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 6.010126582278481, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 1.1490142345428467, |
|
"eval_runtime": 9.1478, |
|
"eval_samples_per_second": 5.138, |
|
"eval_steps_per_second": 1.312, |
|
"step": 560 |
|
}, |
|
{ |
|
"epoch": 7.00126582278481, |
|
"grad_norm": 10.804941177368164, |
|
"learning_rate": 7.215189873417722e-06, |
|
"loss": 0.8237, |
|
"step": 570 |
|
}, |
|
{ |
|
"epoch": 7.00253164556962, |
|
"grad_norm": 14.405462265014648, |
|
"learning_rate": 7.341772151898735e-06, |
|
"loss": 0.8341, |
|
"step": 580 |
|
}, |
|
{ |
|
"epoch": 7.00379746835443, |
|
"grad_norm": 33.400726318359375, |
|
"learning_rate": 7.468354430379747e-06, |
|
"loss": 0.8029, |
|
"step": 590 |
|
}, |
|
{ |
|
"epoch": 7.0050632911392405, |
|
"grad_norm": 11.047707557678223, |
|
"learning_rate": 7.5949367088607605e-06, |
|
"loss": 0.935, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 7.006329113924051, |
|
"grad_norm": 30.89590072631836, |
|
"learning_rate": 7.721518987341773e-06, |
|
"loss": 0.901, |
|
"step": 610 |
|
}, |
|
{ |
|
"epoch": 7.007594936708861, |
|
"grad_norm": 14.323598861694336, |
|
"learning_rate": 7.848101265822786e-06, |
|
"loss": 0.8399, |
|
"step": 620 |
|
}, |
|
{ |
|
"epoch": 7.008860759493671, |
|
"grad_norm": 25.75128173828125, |
|
"learning_rate": 7.974683544303799e-06, |
|
"loss": 0.8836, |
|
"step": 630 |
|
}, |
|
{ |
|
"epoch": 7.010126582278481, |
|
"grad_norm": 28.034568786621094, |
|
"learning_rate": 8.10126582278481e-06, |
|
"loss": 0.8492, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 7.010126582278481, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 1.263619303703308, |
|
"eval_runtime": 8.888, |
|
"eval_samples_per_second": 5.288, |
|
"eval_steps_per_second": 1.35, |
|
"step": 640 |
|
}, |
|
{ |
|
"epoch": 8.00126582278481, |
|
"grad_norm": 27.309749603271484, |
|
"learning_rate": 8.227848101265824e-06, |
|
"loss": 0.7993, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 8.00253164556962, |
|
"grad_norm": 23.6923770904541, |
|
"learning_rate": 8.354430379746837e-06, |
|
"loss": 0.8733, |
|
"step": 660 |
|
}, |
|
{ |
|
"epoch": 8.00379746835443, |
|
"grad_norm": 27.559850692749023, |
|
"learning_rate": 8.481012658227848e-06, |
|
"loss": 0.8727, |
|
"step": 670 |
|
}, |
|
{ |
|
"epoch": 8.00506329113924, |
|
"grad_norm": 13.756896018981934, |
|
"learning_rate": 8.607594936708861e-06, |
|
"loss": 0.7896, |
|
"step": 680 |
|
}, |
|
{ |
|
"epoch": 8.00632911392405, |
|
"grad_norm": 17.663959503173828, |
|
"learning_rate": 8.734177215189874e-06, |
|
"loss": 0.718, |
|
"step": 690 |
|
}, |
|
{ |
|
"epoch": 8.00759493670886, |
|
"grad_norm": 11.68373966217041, |
|
"learning_rate": 8.860759493670886e-06, |
|
"loss": 0.6608, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 8.00886075949367, |
|
"grad_norm": 15.120232582092285, |
|
"learning_rate": 8.987341772151899e-06, |
|
"loss": 0.7421, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 8.010126582278481, |
|
"grad_norm": 12.948484420776367, |
|
"learning_rate": 9.113924050632912e-06, |
|
"loss": 0.8842, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 8.010126582278481, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 1.4060986042022705, |
|
"eval_runtime": 8.8573, |
|
"eval_samples_per_second": 5.306, |
|
"eval_steps_per_second": 1.355, |
|
"step": 720 |
|
}, |
|
{ |
|
"epoch": 9.00126582278481, |
|
"grad_norm": 17.29895782470703, |
|
"learning_rate": 9.240506329113925e-06, |
|
"loss": 0.7192, |
|
"step": 730 |
|
}, |
|
{ |
|
"epoch": 9.00253164556962, |
|
"grad_norm": 16.932331085205078, |
|
"learning_rate": 9.367088607594937e-06, |
|
"loss": 0.8571, |
|
"step": 740 |
|
}, |
|
{ |
|
"epoch": 9.00379746835443, |
|
"grad_norm": 27.8249454498291, |
|
"learning_rate": 9.49367088607595e-06, |
|
"loss": 0.6975, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 9.00506329113924, |
|
"grad_norm": 19.709556579589844, |
|
"learning_rate": 9.620253164556963e-06, |
|
"loss": 0.7901, |
|
"step": 760 |
|
}, |
|
{ |
|
"epoch": 9.00632911392405, |
|
"grad_norm": 27.908536911010742, |
|
"learning_rate": 9.746835443037975e-06, |
|
"loss": 0.7778, |
|
"step": 770 |
|
}, |
|
{ |
|
"epoch": 9.00759493670886, |
|
"grad_norm": 11.295394897460938, |
|
"learning_rate": 9.87341772151899e-06, |
|
"loss": 0.6872, |
|
"step": 780 |
|
}, |
|
{ |
|
"epoch": 9.00886075949367, |
|
"grad_norm": 19.349098205566406, |
|
"learning_rate": 1e-05, |
|
"loss": 0.7879, |
|
"step": 790 |
|
}, |
|
{ |
|
"epoch": 9.010126582278481, |
|
"grad_norm": 17.75351333618164, |
|
"learning_rate": 9.985935302391e-06, |
|
"loss": 0.6599, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 9.010126582278481, |
|
"eval_accuracy": 0.2978723404255319, |
|
"eval_loss": 1.3445005416870117, |
|
"eval_runtime": 8.6196, |
|
"eval_samples_per_second": 5.453, |
|
"eval_steps_per_second": 1.392, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 10.00126582278481, |
|
"grad_norm": 21.275543212890625, |
|
"learning_rate": 9.971870604781998e-06, |
|
"loss": 0.6019, |
|
"step": 810 |
|
}, |
|
{ |
|
"epoch": 10.00253164556962, |
|
"grad_norm": 29.977495193481445, |
|
"learning_rate": 9.957805907172996e-06, |
|
"loss": 0.724, |
|
"step": 820 |
|
}, |
|
{ |
|
"epoch": 10.00379746835443, |
|
"grad_norm": 33.56300354003906, |
|
"learning_rate": 9.943741209563994e-06, |
|
"loss": 0.6457, |
|
"step": 830 |
|
}, |
|
{ |
|
"epoch": 10.00506329113924, |
|
"grad_norm": 38.13019943237305, |
|
"learning_rate": 9.929676511954994e-06, |
|
"loss": 0.6331, |
|
"step": 840 |
|
}, |
|
{ |
|
"epoch": 10.00632911392405, |
|
"grad_norm": 42.173423767089844, |
|
"learning_rate": 9.915611814345992e-06, |
|
"loss": 0.5996, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 10.00759493670886, |
|
"grad_norm": 11.129090309143066, |
|
"learning_rate": 9.901547116736992e-06, |
|
"loss": 0.615, |
|
"step": 860 |
|
}, |
|
{ |
|
"epoch": 10.00886075949367, |
|
"grad_norm": 37.42063903808594, |
|
"learning_rate": 9.88748241912799e-06, |
|
"loss": 0.6022, |
|
"step": 870 |
|
}, |
|
{ |
|
"epoch": 10.010126582278481, |
|
"grad_norm": 55.16875457763672, |
|
"learning_rate": 9.87341772151899e-06, |
|
"loss": 0.6723, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 10.010126582278481, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 1.4071933031082153, |
|
"eval_runtime": 8.6355, |
|
"eval_samples_per_second": 5.443, |
|
"eval_steps_per_second": 1.39, |
|
"step": 880 |
|
}, |
|
{ |
|
"epoch": 11.00126582278481, |
|
"grad_norm": 14.047639846801758, |
|
"learning_rate": 9.859353023909987e-06, |
|
"loss": 0.5122, |
|
"step": 890 |
|
}, |
|
{ |
|
"epoch": 11.00253164556962, |
|
"grad_norm": 14.567192077636719, |
|
"learning_rate": 9.845288326300985e-06, |
|
"loss": 0.5763, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 11.00379746835443, |
|
"grad_norm": 31.18760871887207, |
|
"learning_rate": 9.831223628691983e-06, |
|
"loss": 0.6611, |
|
"step": 910 |
|
}, |
|
{ |
|
"epoch": 11.00506329113924, |
|
"grad_norm": 49.245513916015625, |
|
"learning_rate": 9.817158931082983e-06, |
|
"loss": 0.7129, |
|
"step": 920 |
|
}, |
|
{ |
|
"epoch": 11.00632911392405, |
|
"grad_norm": 25.506393432617188, |
|
"learning_rate": 9.803094233473981e-06, |
|
"loss": 0.4678, |
|
"step": 930 |
|
}, |
|
{ |
|
"epoch": 11.00759493670886, |
|
"grad_norm": 16.567678451538086, |
|
"learning_rate": 9.78902953586498e-06, |
|
"loss": 0.6464, |
|
"step": 940 |
|
}, |
|
{ |
|
"epoch": 11.00886075949367, |
|
"grad_norm": 45.41640090942383, |
|
"learning_rate": 9.774964838255979e-06, |
|
"loss": 0.6556, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 11.010126582278481, |
|
"grad_norm": 53.20558166503906, |
|
"learning_rate": 9.760900140646977e-06, |
|
"loss": 0.604, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 11.010126582278481, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 1.4198564291000366, |
|
"eval_runtime": 8.4854, |
|
"eval_samples_per_second": 5.539, |
|
"eval_steps_per_second": 1.414, |
|
"step": 960 |
|
}, |
|
{ |
|
"epoch": 12.00126582278481, |
|
"grad_norm": 13.426566123962402, |
|
"learning_rate": 9.746835443037975e-06, |
|
"loss": 0.3598, |
|
"step": 970 |
|
}, |
|
{ |
|
"epoch": 12.00253164556962, |
|
"grad_norm": 48.93745422363281, |
|
"learning_rate": 9.732770745428974e-06, |
|
"loss": 0.5107, |
|
"step": 980 |
|
}, |
|
{ |
|
"epoch": 12.00379746835443, |
|
"grad_norm": 33.23870849609375, |
|
"learning_rate": 9.718706047819972e-06, |
|
"loss": 0.629, |
|
"step": 990 |
|
}, |
|
{ |
|
"epoch": 12.00506329113924, |
|
"grad_norm": 75.58332061767578, |
|
"learning_rate": 9.704641350210972e-06, |
|
"loss": 0.4616, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 12.00632911392405, |
|
"grad_norm": 29.726964950561523, |
|
"learning_rate": 9.69057665260197e-06, |
|
"loss": 0.6597, |
|
"step": 1010 |
|
}, |
|
{ |
|
"epoch": 12.00759493670886, |
|
"grad_norm": 41.4447135925293, |
|
"learning_rate": 9.67651195499297e-06, |
|
"loss": 0.67, |
|
"step": 1020 |
|
}, |
|
{ |
|
"epoch": 12.00886075949367, |
|
"grad_norm": 59.76002502441406, |
|
"learning_rate": 9.662447257383967e-06, |
|
"loss": 0.6902, |
|
"step": 1030 |
|
}, |
|
{ |
|
"epoch": 12.010126582278481, |
|
"grad_norm": 25.5214786529541, |
|
"learning_rate": 9.648382559774965e-06, |
|
"loss": 0.4959, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 12.010126582278481, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 1.5688742399215698, |
|
"eval_runtime": 8.4949, |
|
"eval_samples_per_second": 5.533, |
|
"eval_steps_per_second": 1.413, |
|
"step": 1040 |
|
}, |
|
{ |
|
"epoch": 13.00126582278481, |
|
"grad_norm": 17.342782974243164, |
|
"learning_rate": 9.634317862165963e-06, |
|
"loss": 0.4443, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 13.00253164556962, |
|
"grad_norm": 6.651524066925049, |
|
"learning_rate": 9.620253164556963e-06, |
|
"loss": 0.4626, |
|
"step": 1060 |
|
}, |
|
{ |
|
"epoch": 13.00379746835443, |
|
"grad_norm": 59.05470275878906, |
|
"learning_rate": 9.606188466947961e-06, |
|
"loss": 0.5051, |
|
"step": 1070 |
|
}, |
|
{ |
|
"epoch": 13.00506329113924, |
|
"grad_norm": 12.133808135986328, |
|
"learning_rate": 9.59212376933896e-06, |
|
"loss": 0.4063, |
|
"step": 1080 |
|
}, |
|
{ |
|
"epoch": 13.00632911392405, |
|
"grad_norm": 5.521517753601074, |
|
"learning_rate": 9.578059071729959e-06, |
|
"loss": 0.3626, |
|
"step": 1090 |
|
}, |
|
{ |
|
"epoch": 13.00759493670886, |
|
"grad_norm": 39.51848220825195, |
|
"learning_rate": 9.563994374120957e-06, |
|
"loss": 0.4715, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 13.00886075949367, |
|
"grad_norm": 17.837867736816406, |
|
"learning_rate": 9.549929676511955e-06, |
|
"loss": 0.4161, |
|
"step": 1110 |
|
}, |
|
{ |
|
"epoch": 13.010126582278481, |
|
"grad_norm": 10.324262619018555, |
|
"learning_rate": 9.535864978902954e-06, |
|
"loss": 0.3758, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 13.010126582278481, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 1.7867138385772705, |
|
"eval_runtime": 8.632, |
|
"eval_samples_per_second": 5.445, |
|
"eval_steps_per_second": 1.39, |
|
"step": 1120 |
|
}, |
|
{ |
|
"epoch": 14.00126582278481, |
|
"grad_norm": 3.8076212406158447, |
|
"learning_rate": 9.521800281293952e-06, |
|
"loss": 0.2181, |
|
"step": 1130 |
|
}, |
|
{ |
|
"epoch": 14.00253164556962, |
|
"grad_norm": 7.90512752532959, |
|
"learning_rate": 9.507735583684952e-06, |
|
"loss": 0.4037, |
|
"step": 1140 |
|
}, |
|
{ |
|
"epoch": 14.00379746835443, |
|
"grad_norm": 6.371408462524414, |
|
"learning_rate": 9.49367088607595e-06, |
|
"loss": 0.7414, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 14.00506329113924, |
|
"grad_norm": 21.530675888061523, |
|
"learning_rate": 9.47960618846695e-06, |
|
"loss": 0.2786, |
|
"step": 1160 |
|
}, |
|
{ |
|
"epoch": 14.00632911392405, |
|
"grad_norm": 1.7298585176467896, |
|
"learning_rate": 9.465541490857948e-06, |
|
"loss": 0.2941, |
|
"step": 1170 |
|
}, |
|
{ |
|
"epoch": 14.00759493670886, |
|
"grad_norm": 14.179819107055664, |
|
"learning_rate": 9.451476793248946e-06, |
|
"loss": 0.6105, |
|
"step": 1180 |
|
}, |
|
{ |
|
"epoch": 14.00886075949367, |
|
"grad_norm": 35.71600341796875, |
|
"learning_rate": 9.437412095639944e-06, |
|
"loss": 0.4703, |
|
"step": 1190 |
|
}, |
|
{ |
|
"epoch": 14.010126582278481, |
|
"grad_norm": 7.779309272766113, |
|
"learning_rate": 9.423347398030943e-06, |
|
"loss": 0.6257, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 14.010126582278481, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 1.921836018562317, |
|
"eval_runtime": 8.7081, |
|
"eval_samples_per_second": 5.397, |
|
"eval_steps_per_second": 1.378, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 15.00126582278481, |
|
"grad_norm": 90.20023345947266, |
|
"learning_rate": 9.409282700421943e-06, |
|
"loss": 0.3217, |
|
"step": 1210 |
|
}, |
|
{ |
|
"epoch": 15.00253164556962, |
|
"grad_norm": 6.699902534484863, |
|
"learning_rate": 9.395218002812941e-06, |
|
"loss": 0.2383, |
|
"step": 1220 |
|
}, |
|
{ |
|
"epoch": 15.00379746835443, |
|
"grad_norm": 15.322399139404297, |
|
"learning_rate": 9.381153305203939e-06, |
|
"loss": 0.2347, |
|
"step": 1230 |
|
}, |
|
{ |
|
"epoch": 15.00506329113924, |
|
"grad_norm": 4.224050998687744, |
|
"learning_rate": 9.367088607594937e-06, |
|
"loss": 0.1293, |
|
"step": 1240 |
|
}, |
|
{ |
|
"epoch": 15.00632911392405, |
|
"grad_norm": 3.2699191570281982, |
|
"learning_rate": 9.353023909985936e-06, |
|
"loss": 0.203, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 15.00759493670886, |
|
"grad_norm": 69.02498626708984, |
|
"learning_rate": 9.338959212376934e-06, |
|
"loss": 0.3505, |
|
"step": 1260 |
|
}, |
|
{ |
|
"epoch": 15.00886075949367, |
|
"grad_norm": 148.28306579589844, |
|
"learning_rate": 9.324894514767934e-06, |
|
"loss": 0.3983, |
|
"step": 1270 |
|
}, |
|
{ |
|
"epoch": 15.010126582278481, |
|
"grad_norm": 26.6025447845459, |
|
"learning_rate": 9.310829817158932e-06, |
|
"loss": 0.3693, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 15.010126582278481, |
|
"eval_accuracy": 0.3191489361702128, |
|
"eval_loss": 2.09875750541687, |
|
"eval_runtime": 9.0724, |
|
"eval_samples_per_second": 5.181, |
|
"eval_steps_per_second": 1.323, |
|
"step": 1280 |
|
}, |
|
{ |
|
"epoch": 16.00126582278481, |
|
"grad_norm": 34.19914627075195, |
|
"learning_rate": 9.29676511954993e-06, |
|
"loss": 0.3708, |
|
"step": 1290 |
|
}, |
|
{ |
|
"epoch": 16.00253164556962, |
|
"grad_norm": 57.25946807861328, |
|
"learning_rate": 9.28270042194093e-06, |
|
"loss": 0.5632, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 16.00379746835443, |
|
"grad_norm": 1.7772458791732788, |
|
"learning_rate": 9.268635724331928e-06, |
|
"loss": 0.2617, |
|
"step": 1310 |
|
}, |
|
{ |
|
"epoch": 16.00506329113924, |
|
"grad_norm": 56.837650299072266, |
|
"learning_rate": 9.254571026722926e-06, |
|
"loss": 0.3024, |
|
"step": 1320 |
|
}, |
|
{ |
|
"epoch": 16.00632911392405, |
|
"grad_norm": 0.5459592342376709, |
|
"learning_rate": 9.240506329113925e-06, |
|
"loss": 0.2552, |
|
"step": 1330 |
|
}, |
|
{ |
|
"epoch": 16.00759493670886, |
|
"grad_norm": 153.30613708496094, |
|
"learning_rate": 9.226441631504923e-06, |
|
"loss": 0.659, |
|
"step": 1340 |
|
}, |
|
{ |
|
"epoch": 16.008860759493672, |
|
"grad_norm": 47.839324951171875, |
|
"learning_rate": 9.212376933895923e-06, |
|
"loss": 0.3776, |
|
"step": 1350 |
|
}, |
|
{ |
|
"epoch": 16.01012658227848, |
|
"grad_norm": 57.103763580322266, |
|
"learning_rate": 9.198312236286921e-06, |
|
"loss": 0.5933, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 16.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 1.8412983417510986, |
|
"eval_runtime": 8.4821, |
|
"eval_samples_per_second": 5.541, |
|
"eval_steps_per_second": 1.415, |
|
"step": 1360 |
|
}, |
|
{ |
|
"epoch": 17.00126582278481, |
|
"grad_norm": 0.2360084503889084, |
|
"learning_rate": 9.184247538677919e-06, |
|
"loss": 0.0993, |
|
"step": 1370 |
|
}, |
|
{ |
|
"epoch": 17.00253164556962, |
|
"grad_norm": 1.5083540678024292, |
|
"learning_rate": 9.170182841068917e-06, |
|
"loss": 0.2528, |
|
"step": 1380 |
|
}, |
|
{ |
|
"epoch": 17.00379746835443, |
|
"grad_norm": 7.469198226928711, |
|
"learning_rate": 9.156118143459917e-06, |
|
"loss": 0.3329, |
|
"step": 1390 |
|
}, |
|
{ |
|
"epoch": 17.00506329113924, |
|
"grad_norm": 100.13819885253906, |
|
"learning_rate": 9.142053445850915e-06, |
|
"loss": 0.2834, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 17.00632911392405, |
|
"grad_norm": 110.03264617919922, |
|
"learning_rate": 9.127988748241914e-06, |
|
"loss": 0.6402, |
|
"step": 1410 |
|
}, |
|
{ |
|
"epoch": 17.00759493670886, |
|
"grad_norm": 116.64907836914062, |
|
"learning_rate": 9.113924050632912e-06, |
|
"loss": 0.4343, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 17.008860759493672, |
|
"grad_norm": 13.220937728881836, |
|
"learning_rate": 9.09985935302391e-06, |
|
"loss": 0.3556, |
|
"step": 1430 |
|
}, |
|
{ |
|
"epoch": 17.01012658227848, |
|
"grad_norm": 63.16554260253906, |
|
"learning_rate": 9.085794655414908e-06, |
|
"loss": 0.202, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 17.01012658227848, |
|
"eval_accuracy": 0.3191489361702128, |
|
"eval_loss": 2.753727436065674, |
|
"eval_runtime": 8.441, |
|
"eval_samples_per_second": 5.568, |
|
"eval_steps_per_second": 1.422, |
|
"step": 1440 |
|
}, |
|
{ |
|
"epoch": 18.00126582278481, |
|
"grad_norm": 6.848087310791016, |
|
"learning_rate": 9.071729957805908e-06, |
|
"loss": 0.2198, |
|
"step": 1450 |
|
}, |
|
{ |
|
"epoch": 18.00253164556962, |
|
"grad_norm": 24.780672073364258, |
|
"learning_rate": 9.057665260196906e-06, |
|
"loss": 0.2974, |
|
"step": 1460 |
|
}, |
|
{ |
|
"epoch": 18.00379746835443, |
|
"grad_norm": 28.783912658691406, |
|
"learning_rate": 9.043600562587905e-06, |
|
"loss": 0.2387, |
|
"step": 1470 |
|
}, |
|
{ |
|
"epoch": 18.00506329113924, |
|
"grad_norm": 0.49766799807548523, |
|
"learning_rate": 9.029535864978903e-06, |
|
"loss": 0.2592, |
|
"step": 1480 |
|
}, |
|
{ |
|
"epoch": 18.00632911392405, |
|
"grad_norm": 107.1086196899414, |
|
"learning_rate": 9.015471167369903e-06, |
|
"loss": 0.2736, |
|
"step": 1490 |
|
}, |
|
{ |
|
"epoch": 18.00759493670886, |
|
"grad_norm": 1.34207284450531, |
|
"learning_rate": 9.001406469760901e-06, |
|
"loss": 0.3996, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 18.008860759493672, |
|
"grad_norm": 0.8816600441932678, |
|
"learning_rate": 8.987341772151899e-06, |
|
"loss": 0.3255, |
|
"step": 1510 |
|
}, |
|
{ |
|
"epoch": 18.01012658227848, |
|
"grad_norm": 0.2845398187637329, |
|
"learning_rate": 8.973277074542897e-06, |
|
"loss": 0.1454, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 18.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 2.461174964904785, |
|
"eval_runtime": 8.4461, |
|
"eval_samples_per_second": 5.565, |
|
"eval_steps_per_second": 1.421, |
|
"step": 1520 |
|
}, |
|
{ |
|
"epoch": 19.00126582278481, |
|
"grad_norm": 7.336277961730957, |
|
"learning_rate": 8.959212376933897e-06, |
|
"loss": 0.1073, |
|
"step": 1530 |
|
}, |
|
{ |
|
"epoch": 19.00253164556962, |
|
"grad_norm": 1.7120882272720337, |
|
"learning_rate": 8.945147679324895e-06, |
|
"loss": 0.286, |
|
"step": 1540 |
|
}, |
|
{ |
|
"epoch": 19.00379746835443, |
|
"grad_norm": 5.534066677093506, |
|
"learning_rate": 8.931082981715894e-06, |
|
"loss": 0.0711, |
|
"step": 1550 |
|
}, |
|
{ |
|
"epoch": 19.00506329113924, |
|
"grad_norm": 0.4742295742034912, |
|
"learning_rate": 8.917018284106892e-06, |
|
"loss": 0.0344, |
|
"step": 1560 |
|
}, |
|
{ |
|
"epoch": 19.00632911392405, |
|
"grad_norm": 7.864910125732422, |
|
"learning_rate": 8.90295358649789e-06, |
|
"loss": 0.4488, |
|
"step": 1570 |
|
}, |
|
{ |
|
"epoch": 19.00759493670886, |
|
"grad_norm": 29.55208396911621, |
|
"learning_rate": 8.888888888888888e-06, |
|
"loss": 0.0769, |
|
"step": 1580 |
|
}, |
|
{ |
|
"epoch": 19.008860759493672, |
|
"grad_norm": 6.9868597984313965, |
|
"learning_rate": 8.874824191279888e-06, |
|
"loss": 0.2275, |
|
"step": 1590 |
|
}, |
|
{ |
|
"epoch": 19.01012658227848, |
|
"grad_norm": 0.9346122741699219, |
|
"learning_rate": 8.860759493670886e-06, |
|
"loss": 0.1332, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 19.01012658227848, |
|
"eval_accuracy": 0.3404255319148936, |
|
"eval_loss": 3.094426155090332, |
|
"eval_runtime": 8.4844, |
|
"eval_samples_per_second": 5.54, |
|
"eval_steps_per_second": 1.414, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 20.00126582278481, |
|
"grad_norm": 0.2495788335800171, |
|
"learning_rate": 8.846694796061886e-06, |
|
"loss": 0.0054, |
|
"step": 1610 |
|
}, |
|
{ |
|
"epoch": 20.00253164556962, |
|
"grad_norm": 0.9110737442970276, |
|
"learning_rate": 8.832630098452884e-06, |
|
"loss": 0.1922, |
|
"step": 1620 |
|
}, |
|
{ |
|
"epoch": 20.00379746835443, |
|
"grad_norm": 18.964305877685547, |
|
"learning_rate": 8.818565400843883e-06, |
|
"loss": 0.0081, |
|
"step": 1630 |
|
}, |
|
{ |
|
"epoch": 20.00506329113924, |
|
"grad_norm": 32.764984130859375, |
|
"learning_rate": 8.804500703234881e-06, |
|
"loss": 0.1649, |
|
"step": 1640 |
|
}, |
|
{ |
|
"epoch": 20.00632911392405, |
|
"grad_norm": 0.6211456060409546, |
|
"learning_rate": 8.79043600562588e-06, |
|
"loss": 0.267, |
|
"step": 1650 |
|
}, |
|
{ |
|
"epoch": 20.00759493670886, |
|
"grad_norm": 11.705927848815918, |
|
"learning_rate": 8.776371308016879e-06, |
|
"loss": 0.0885, |
|
"step": 1660 |
|
}, |
|
{ |
|
"epoch": 20.008860759493672, |
|
"grad_norm": 4.711695194244385, |
|
"learning_rate": 8.762306610407877e-06, |
|
"loss": 0.1366, |
|
"step": 1670 |
|
}, |
|
{ |
|
"epoch": 20.01012658227848, |
|
"grad_norm": 1.115964412689209, |
|
"learning_rate": 8.748241912798877e-06, |
|
"loss": 0.9193, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 20.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 2.869112491607666, |
|
"eval_runtime": 8.4747, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 1.416, |
|
"step": 1680 |
|
}, |
|
{ |
|
"epoch": 21.00126582278481, |
|
"grad_norm": 0.7912726998329163, |
|
"learning_rate": 8.734177215189874e-06, |
|
"loss": 0.0675, |
|
"step": 1690 |
|
}, |
|
{ |
|
"epoch": 21.00253164556962, |
|
"grad_norm": 1.868703007698059, |
|
"learning_rate": 8.720112517580872e-06, |
|
"loss": 0.0215, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 21.00379746835443, |
|
"grad_norm": 0.07253948599100113, |
|
"learning_rate": 8.70604781997187e-06, |
|
"loss": 0.1879, |
|
"step": 1710 |
|
}, |
|
{ |
|
"epoch": 21.00506329113924, |
|
"grad_norm": 0.07606098800897598, |
|
"learning_rate": 8.69198312236287e-06, |
|
"loss": 0.2937, |
|
"step": 1720 |
|
}, |
|
{ |
|
"epoch": 21.00632911392405, |
|
"grad_norm": 4.814393520355225, |
|
"learning_rate": 8.677918424753868e-06, |
|
"loss": 0.1223, |
|
"step": 1730 |
|
}, |
|
{ |
|
"epoch": 21.00759493670886, |
|
"grad_norm": 0.052608225494623184, |
|
"learning_rate": 8.663853727144868e-06, |
|
"loss": 0.1895, |
|
"step": 1740 |
|
}, |
|
{ |
|
"epoch": 21.008860759493672, |
|
"grad_norm": 6.358555316925049, |
|
"learning_rate": 8.649789029535866e-06, |
|
"loss": 0.2224, |
|
"step": 1750 |
|
}, |
|
{ |
|
"epoch": 21.01012658227848, |
|
"grad_norm": 0.10539772361516953, |
|
"learning_rate": 8.635724331926865e-06, |
|
"loss": 0.1201, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 21.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 3.0563912391662598, |
|
"eval_runtime": 8.5749, |
|
"eval_samples_per_second": 5.481, |
|
"eval_steps_per_second": 1.399, |
|
"step": 1760 |
|
}, |
|
{ |
|
"epoch": 22.00126582278481, |
|
"grad_norm": 3.320700168609619, |
|
"learning_rate": 8.621659634317863e-06, |
|
"loss": 0.0066, |
|
"step": 1770 |
|
}, |
|
{ |
|
"epoch": 22.00253164556962, |
|
"grad_norm": 77.03856658935547, |
|
"learning_rate": 8.607594936708861e-06, |
|
"loss": 0.0985, |
|
"step": 1780 |
|
}, |
|
{ |
|
"epoch": 22.00379746835443, |
|
"grad_norm": 0.06309456378221512, |
|
"learning_rate": 8.59353023909986e-06, |
|
"loss": 0.0229, |
|
"step": 1790 |
|
}, |
|
{ |
|
"epoch": 22.00506329113924, |
|
"grad_norm": 81.78655242919922, |
|
"learning_rate": 8.579465541490859e-06, |
|
"loss": 0.1983, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 22.00632911392405, |
|
"grad_norm": 0.15561726689338684, |
|
"learning_rate": 8.565400843881857e-06, |
|
"loss": 0.1817, |
|
"step": 1810 |
|
}, |
|
{ |
|
"epoch": 22.00759493670886, |
|
"grad_norm": 10.21172046661377, |
|
"learning_rate": 8.551336146272857e-06, |
|
"loss": 0.0148, |
|
"step": 1820 |
|
}, |
|
{ |
|
"epoch": 22.008860759493672, |
|
"grad_norm": 0.5883349180221558, |
|
"learning_rate": 8.537271448663855e-06, |
|
"loss": 0.0018, |
|
"step": 1830 |
|
}, |
|
{ |
|
"epoch": 22.01012658227848, |
|
"grad_norm": 0.019595500081777573, |
|
"learning_rate": 8.523206751054853e-06, |
|
"loss": 0.1716, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 22.01012658227848, |
|
"eval_accuracy": 0.3404255319148936, |
|
"eval_loss": 3.390719175338745, |
|
"eval_runtime": 8.6187, |
|
"eval_samples_per_second": 5.453, |
|
"eval_steps_per_second": 1.392, |
|
"step": 1840 |
|
}, |
|
{ |
|
"epoch": 23.00126582278481, |
|
"grad_norm": 0.025295179337263107, |
|
"learning_rate": 8.50914205344585e-06, |
|
"loss": 0.0037, |
|
"step": 1850 |
|
}, |
|
{ |
|
"epoch": 23.00253164556962, |
|
"grad_norm": 0.1332973688840866, |
|
"learning_rate": 8.49507735583685e-06, |
|
"loss": 0.0026, |
|
"step": 1860 |
|
}, |
|
{ |
|
"epoch": 23.00379746835443, |
|
"grad_norm": 0.08286605030298233, |
|
"learning_rate": 8.481012658227848e-06, |
|
"loss": 0.1337, |
|
"step": 1870 |
|
}, |
|
{ |
|
"epoch": 23.00506329113924, |
|
"grad_norm": 0.11277411133050919, |
|
"learning_rate": 8.466947960618848e-06, |
|
"loss": 0.1286, |
|
"step": 1880 |
|
}, |
|
{ |
|
"epoch": 23.00632911392405, |
|
"grad_norm": 0.047154348343610764, |
|
"learning_rate": 8.452883263009846e-06, |
|
"loss": 0.0068, |
|
"step": 1890 |
|
}, |
|
{ |
|
"epoch": 23.00759493670886, |
|
"grad_norm": 0.02648848481476307, |
|
"learning_rate": 8.438818565400846e-06, |
|
"loss": 0.0168, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 23.008860759493672, |
|
"grad_norm": 0.0498431995511055, |
|
"learning_rate": 8.424753867791844e-06, |
|
"loss": 0.0048, |
|
"step": 1910 |
|
}, |
|
{ |
|
"epoch": 23.01012658227848, |
|
"grad_norm": 0.11999885737895966, |
|
"learning_rate": 8.410689170182841e-06, |
|
"loss": 0.0402, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 23.01012658227848, |
|
"eval_accuracy": 0.3191489361702128, |
|
"eval_loss": 3.7917425632476807, |
|
"eval_runtime": 8.7081, |
|
"eval_samples_per_second": 5.397, |
|
"eval_steps_per_second": 1.378, |
|
"step": 1920 |
|
}, |
|
{ |
|
"epoch": 24.00126582278481, |
|
"grad_norm": 0.034499507397413254, |
|
"learning_rate": 8.39662447257384e-06, |
|
"loss": 0.012, |
|
"step": 1930 |
|
}, |
|
{ |
|
"epoch": 24.00253164556962, |
|
"grad_norm": 42.179473876953125, |
|
"learning_rate": 8.382559774964839e-06, |
|
"loss": 0.2346, |
|
"step": 1940 |
|
}, |
|
{ |
|
"epoch": 24.00379746835443, |
|
"grad_norm": 0.6478450298309326, |
|
"learning_rate": 8.368495077355837e-06, |
|
"loss": 0.008, |
|
"step": 1950 |
|
}, |
|
{ |
|
"epoch": 24.00506329113924, |
|
"grad_norm": 0.04269712418317795, |
|
"learning_rate": 8.354430379746837e-06, |
|
"loss": 0.004, |
|
"step": 1960 |
|
}, |
|
{ |
|
"epoch": 24.00632911392405, |
|
"grad_norm": 29.495561599731445, |
|
"learning_rate": 8.340365682137835e-06, |
|
"loss": 0.1656, |
|
"step": 1970 |
|
}, |
|
{ |
|
"epoch": 24.00759493670886, |
|
"grad_norm": 0.09528925269842148, |
|
"learning_rate": 8.326300984528833e-06, |
|
"loss": 0.0268, |
|
"step": 1980 |
|
}, |
|
{ |
|
"epoch": 24.008860759493672, |
|
"grad_norm": 0.023056741803884506, |
|
"learning_rate": 8.31223628691983e-06, |
|
"loss": 0.0095, |
|
"step": 1990 |
|
}, |
|
{ |
|
"epoch": 24.01012658227848, |
|
"grad_norm": 2.2930028438568115, |
|
"learning_rate": 8.29817158931083e-06, |
|
"loss": 0.0709, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 24.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 3.5486884117126465, |
|
"eval_runtime": 8.9153, |
|
"eval_samples_per_second": 5.272, |
|
"eval_steps_per_second": 1.346, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 25.00126582278481, |
|
"grad_norm": 0.614183783531189, |
|
"learning_rate": 8.284106891701828e-06, |
|
"loss": 0.0297, |
|
"step": 2010 |
|
}, |
|
{ |
|
"epoch": 25.00253164556962, |
|
"grad_norm": 243.75750732421875, |
|
"learning_rate": 8.270042194092828e-06, |
|
"loss": 0.1049, |
|
"step": 2020 |
|
}, |
|
{ |
|
"epoch": 25.00379746835443, |
|
"grad_norm": 210.3068389892578, |
|
"learning_rate": 8.255977496483826e-06, |
|
"loss": 0.0886, |
|
"step": 2030 |
|
}, |
|
{ |
|
"epoch": 25.00506329113924, |
|
"grad_norm": 2.261234760284424, |
|
"learning_rate": 8.241912798874826e-06, |
|
"loss": 0.0027, |
|
"step": 2040 |
|
}, |
|
{ |
|
"epoch": 25.00632911392405, |
|
"grad_norm": 6.479150772094727, |
|
"learning_rate": 8.227848101265824e-06, |
|
"loss": 0.1408, |
|
"step": 2050 |
|
}, |
|
{ |
|
"epoch": 25.00759493670886, |
|
"grad_norm": 0.04374171420931816, |
|
"learning_rate": 8.213783403656822e-06, |
|
"loss": 0.0659, |
|
"step": 2060 |
|
}, |
|
{ |
|
"epoch": 25.008860759493672, |
|
"grad_norm": 0.19435258209705353, |
|
"learning_rate": 8.199718706047821e-06, |
|
"loss": 0.0016, |
|
"step": 2070 |
|
}, |
|
{ |
|
"epoch": 25.01012658227848, |
|
"grad_norm": 0.020269129425287247, |
|
"learning_rate": 8.18565400843882e-06, |
|
"loss": 0.1021, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 25.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 3.9004390239715576, |
|
"eval_runtime": 8.7333, |
|
"eval_samples_per_second": 5.382, |
|
"eval_steps_per_second": 1.374, |
|
"step": 2080 |
|
}, |
|
{ |
|
"epoch": 26.00126582278481, |
|
"grad_norm": 0.07372234761714935, |
|
"learning_rate": 8.171589310829819e-06, |
|
"loss": 0.0096, |
|
"step": 2090 |
|
}, |
|
{ |
|
"epoch": 26.00253164556962, |
|
"grad_norm": 0.9319536089897156, |
|
"learning_rate": 8.157524613220817e-06, |
|
"loss": 0.0877, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 26.00379746835443, |
|
"grad_norm": 1.9737757444381714, |
|
"learning_rate": 8.143459915611815e-06, |
|
"loss": 0.0105, |
|
"step": 2110 |
|
}, |
|
{ |
|
"epoch": 26.00506329113924, |
|
"grad_norm": 0.010359777137637138, |
|
"learning_rate": 8.129395218002813e-06, |
|
"loss": 0.0019, |
|
"step": 2120 |
|
}, |
|
{ |
|
"epoch": 26.00632911392405, |
|
"grad_norm": 0.16365653276443481, |
|
"learning_rate": 8.115330520393813e-06, |
|
"loss": 0.0006, |
|
"step": 2130 |
|
}, |
|
{ |
|
"epoch": 26.00759493670886, |
|
"grad_norm": 184.18040466308594, |
|
"learning_rate": 8.10126582278481e-06, |
|
"loss": 0.1279, |
|
"step": 2140 |
|
}, |
|
{ |
|
"epoch": 26.008860759493672, |
|
"grad_norm": 0.01543757226318121, |
|
"learning_rate": 8.08720112517581e-06, |
|
"loss": 0.0289, |
|
"step": 2150 |
|
}, |
|
{ |
|
"epoch": 26.01012658227848, |
|
"grad_norm": 0.02343440055847168, |
|
"learning_rate": 8.073136427566808e-06, |
|
"loss": 0.0029, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 26.01012658227848, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 4.194858551025391, |
|
"eval_runtime": 9.0554, |
|
"eval_samples_per_second": 5.19, |
|
"eval_steps_per_second": 1.325, |
|
"step": 2160 |
|
}, |
|
{ |
|
"epoch": 27.00126582278481, |
|
"grad_norm": 0.163554847240448, |
|
"learning_rate": 8.059071729957806e-06, |
|
"loss": 0.0027, |
|
"step": 2170 |
|
}, |
|
{ |
|
"epoch": 27.00253164556962, |
|
"grad_norm": 64.04247283935547, |
|
"learning_rate": 8.045007032348806e-06, |
|
"loss": 0.0081, |
|
"step": 2180 |
|
}, |
|
{ |
|
"epoch": 27.00379746835443, |
|
"grad_norm": 0.2571711242198944, |
|
"learning_rate": 8.030942334739804e-06, |
|
"loss": 0.0059, |
|
"step": 2190 |
|
}, |
|
{ |
|
"epoch": 27.00506329113924, |
|
"grad_norm": 0.015557597391307354, |
|
"learning_rate": 8.016877637130802e-06, |
|
"loss": 0.0709, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 27.00632911392405, |
|
"grad_norm": 0.05058155208826065, |
|
"learning_rate": 8.002812939521801e-06, |
|
"loss": 0.0016, |
|
"step": 2210 |
|
}, |
|
{ |
|
"epoch": 27.00759493670886, |
|
"grad_norm": 0.06934946775436401, |
|
"learning_rate": 7.9887482419128e-06, |
|
"loss": 0.0048, |
|
"step": 2220 |
|
}, |
|
{ |
|
"epoch": 27.008860759493672, |
|
"grad_norm": 0.06157020479440689, |
|
"learning_rate": 7.974683544303799e-06, |
|
"loss": 0.0006, |
|
"step": 2230 |
|
}, |
|
{ |
|
"epoch": 27.01012658227848, |
|
"grad_norm": 0.016570130363106728, |
|
"learning_rate": 7.960618846694797e-06, |
|
"loss": 0.1352, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 27.01012658227848, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 4.503756999969482, |
|
"eval_runtime": 8.4591, |
|
"eval_samples_per_second": 5.556, |
|
"eval_steps_per_second": 1.419, |
|
"step": 2240 |
|
}, |
|
{ |
|
"epoch": 28.00126582278481, |
|
"grad_norm": 0.05582532659173012, |
|
"learning_rate": 7.946554149085795e-06, |
|
"loss": 0.0875, |
|
"step": 2250 |
|
}, |
|
{ |
|
"epoch": 28.00253164556962, |
|
"grad_norm": 0.04096909984946251, |
|
"learning_rate": 7.932489451476793e-06, |
|
"loss": 0.0003, |
|
"step": 2260 |
|
}, |
|
{ |
|
"epoch": 28.00379746835443, |
|
"grad_norm": 0.9817273616790771, |
|
"learning_rate": 7.918424753867793e-06, |
|
"loss": 0.0012, |
|
"step": 2270 |
|
}, |
|
{ |
|
"epoch": 28.00506329113924, |
|
"grad_norm": 0.07687732577323914, |
|
"learning_rate": 7.90436005625879e-06, |
|
"loss": 0.0023, |
|
"step": 2280 |
|
}, |
|
{ |
|
"epoch": 28.00632911392405, |
|
"grad_norm": 18.15758514404297, |
|
"learning_rate": 7.89029535864979e-06, |
|
"loss": 0.1754, |
|
"step": 2290 |
|
}, |
|
{ |
|
"epoch": 28.00759493670886, |
|
"grad_norm": 0.007940283045172691, |
|
"learning_rate": 7.876230661040788e-06, |
|
"loss": 0.3378, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 28.008860759493672, |
|
"grad_norm": 165.2981414794922, |
|
"learning_rate": 7.862165963431786e-06, |
|
"loss": 0.2031, |
|
"step": 2310 |
|
}, |
|
{ |
|
"epoch": 28.01012658227848, |
|
"grad_norm": 0.009227721951901913, |
|
"learning_rate": 7.848101265822786e-06, |
|
"loss": 0.0173, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 28.01012658227848, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 3.935215473175049, |
|
"eval_runtime": 8.4766, |
|
"eval_samples_per_second": 5.545, |
|
"eval_steps_per_second": 1.416, |
|
"step": 2320 |
|
}, |
|
{ |
|
"epoch": 29.00126582278481, |
|
"grad_norm": 0.01626473106443882, |
|
"learning_rate": 7.834036568213784e-06, |
|
"loss": 0.0544, |
|
"step": 2330 |
|
}, |
|
{ |
|
"epoch": 29.00253164556962, |
|
"grad_norm": 0.018083002418279648, |
|
"learning_rate": 7.819971870604782e-06, |
|
"loss": 0.0064, |
|
"step": 2340 |
|
}, |
|
{ |
|
"epoch": 29.00379746835443, |
|
"grad_norm": 0.2154766470193863, |
|
"learning_rate": 7.805907172995782e-06, |
|
"loss": 0.0006, |
|
"step": 2350 |
|
}, |
|
{ |
|
"epoch": 29.00506329113924, |
|
"grad_norm": 220.96780395507812, |
|
"learning_rate": 7.79184247538678e-06, |
|
"loss": 0.1229, |
|
"step": 2360 |
|
}, |
|
{ |
|
"epoch": 29.00632911392405, |
|
"grad_norm": 0.17289696633815765, |
|
"learning_rate": 7.77777777777778e-06, |
|
"loss": 0.0005, |
|
"step": 2370 |
|
}, |
|
{ |
|
"epoch": 29.00759493670886, |
|
"grad_norm": 1.7889928817749023, |
|
"learning_rate": 7.763713080168777e-06, |
|
"loss": 0.1407, |
|
"step": 2380 |
|
}, |
|
{ |
|
"epoch": 29.008860759493672, |
|
"grad_norm": 0.008173462934792042, |
|
"learning_rate": 7.749648382559775e-06, |
|
"loss": 0.1463, |
|
"step": 2390 |
|
}, |
|
{ |
|
"epoch": 29.01012658227848, |
|
"grad_norm": 0.011393209919333458, |
|
"learning_rate": 7.735583684950773e-06, |
|
"loss": 0.0012, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 29.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 4.323361873626709, |
|
"eval_runtime": 8.4682, |
|
"eval_samples_per_second": 5.55, |
|
"eval_steps_per_second": 1.417, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 30.00126582278481, |
|
"grad_norm": 0.011178904213011265, |
|
"learning_rate": 7.721518987341773e-06, |
|
"loss": 0.0004, |
|
"step": 2410 |
|
}, |
|
{ |
|
"epoch": 30.00253164556962, |
|
"grad_norm": 0.0153023237362504, |
|
"learning_rate": 7.70745428973277e-06, |
|
"loss": 0.0008, |
|
"step": 2420 |
|
}, |
|
{ |
|
"epoch": 30.00379746835443, |
|
"grad_norm": 0.010914456099271774, |
|
"learning_rate": 7.69338959212377e-06, |
|
"loss": 0.0004, |
|
"step": 2430 |
|
}, |
|
{ |
|
"epoch": 30.00506329113924, |
|
"grad_norm": 0.007891859859228134, |
|
"learning_rate": 7.679324894514768e-06, |
|
"loss": 0.0005, |
|
"step": 2440 |
|
}, |
|
{ |
|
"epoch": 30.00632911392405, |
|
"grad_norm": 87.5243911743164, |
|
"learning_rate": 7.665260196905766e-06, |
|
"loss": 0.0081, |
|
"step": 2450 |
|
}, |
|
{ |
|
"epoch": 30.00759493670886, |
|
"grad_norm": 1.0978916883468628, |
|
"learning_rate": 7.651195499296766e-06, |
|
"loss": 0.0004, |
|
"step": 2460 |
|
}, |
|
{ |
|
"epoch": 30.008860759493672, |
|
"grad_norm": 12.773395538330078, |
|
"learning_rate": 7.637130801687764e-06, |
|
"loss": 0.0024, |
|
"step": 2470 |
|
}, |
|
{ |
|
"epoch": 30.01012658227848, |
|
"grad_norm": 0.03179134428501129, |
|
"learning_rate": 7.623066104078764e-06, |
|
"loss": 0.0007, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 30.01012658227848, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 4.287741661071777, |
|
"eval_runtime": 8.4651, |
|
"eval_samples_per_second": 5.552, |
|
"eval_steps_per_second": 1.418, |
|
"step": 2480 |
|
}, |
|
{ |
|
"epoch": 31.00126582278481, |
|
"grad_norm": 0.005546510685235262, |
|
"learning_rate": 7.609001406469762e-06, |
|
"loss": 0.0003, |
|
"step": 2490 |
|
}, |
|
{ |
|
"epoch": 31.00253164556962, |
|
"grad_norm": 0.013632872141897678, |
|
"learning_rate": 7.5949367088607605e-06, |
|
"loss": 0.1395, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 31.00379746835443, |
|
"grad_norm": 0.006918332539498806, |
|
"learning_rate": 7.5808720112517584e-06, |
|
"loss": 0.0027, |
|
"step": 2510 |
|
}, |
|
{ |
|
"epoch": 31.00506329113924, |
|
"grad_norm": 0.012666971422731876, |
|
"learning_rate": 7.566807313642758e-06, |
|
"loss": 0.0004, |
|
"step": 2520 |
|
}, |
|
{ |
|
"epoch": 31.00632911392405, |
|
"grad_norm": 0.005221995059400797, |
|
"learning_rate": 7.552742616033756e-06, |
|
"loss": 0.0009, |
|
"step": 2530 |
|
}, |
|
{ |
|
"epoch": 31.00759493670886, |
|
"grad_norm": 0.013362145982682705, |
|
"learning_rate": 7.538677918424755e-06, |
|
"loss": 0.0012, |
|
"step": 2540 |
|
}, |
|
{ |
|
"epoch": 31.008860759493672, |
|
"grad_norm": 0.035756830126047134, |
|
"learning_rate": 7.524613220815753e-06, |
|
"loss": 0.0004, |
|
"step": 2550 |
|
}, |
|
{ |
|
"epoch": 31.01012658227848, |
|
"grad_norm": 0.08822837471961975, |
|
"learning_rate": 7.510548523206752e-06, |
|
"loss": 0.2292, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 31.01012658227848, |
|
"eval_accuracy": 0.3191489361702128, |
|
"eval_loss": 4.729736804962158, |
|
"eval_runtime": 8.4818, |
|
"eval_samples_per_second": 5.541, |
|
"eval_steps_per_second": 1.415, |
|
"step": 2560 |
|
}, |
|
{ |
|
"epoch": 32.00126582278481, |
|
"grad_norm": 6.84944486618042, |
|
"learning_rate": 7.4964838255977505e-06, |
|
"loss": 0.001, |
|
"step": 2570 |
|
}, |
|
{ |
|
"epoch": 32.00253164556962, |
|
"grad_norm": 0.025634169578552246, |
|
"learning_rate": 7.482419127988749e-06, |
|
"loss": 0.0002, |
|
"step": 2580 |
|
}, |
|
{ |
|
"epoch": 32.00379746835443, |
|
"grad_norm": 0.026997152715921402, |
|
"learning_rate": 7.468354430379747e-06, |
|
"loss": 0.0089, |
|
"step": 2590 |
|
}, |
|
{ |
|
"epoch": 32.00506329113924, |
|
"grad_norm": 0.008302225731313229, |
|
"learning_rate": 7.454289732770746e-06, |
|
"loss": 0.0005, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 32.00632911392405, |
|
"grad_norm": 0.033620625734329224, |
|
"learning_rate": 7.440225035161744e-06, |
|
"loss": 0.0081, |
|
"step": 2610 |
|
}, |
|
{ |
|
"epoch": 32.00759493670886, |
|
"grad_norm": 0.022618619725108147, |
|
"learning_rate": 7.426160337552744e-06, |
|
"loss": 0.0548, |
|
"step": 2620 |
|
}, |
|
{ |
|
"epoch": 32.00886075949367, |
|
"grad_norm": 0.0314350426197052, |
|
"learning_rate": 7.412095639943742e-06, |
|
"loss": 0.0003, |
|
"step": 2630 |
|
}, |
|
{ |
|
"epoch": 32.01012658227848, |
|
"grad_norm": 0.007120281923562288, |
|
"learning_rate": 7.398030942334741e-06, |
|
"loss": 0.0004, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 32.01012658227848, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 4.471046447753906, |
|
"eval_runtime": 8.9073, |
|
"eval_samples_per_second": 5.277, |
|
"eval_steps_per_second": 1.347, |
|
"step": 2640 |
|
}, |
|
{ |
|
"epoch": 33.00126582278481, |
|
"grad_norm": 0.3721332550048828, |
|
"learning_rate": 7.3839662447257386e-06, |
|
"loss": 0.1564, |
|
"step": 2650 |
|
}, |
|
{ |
|
"epoch": 33.00253164556962, |
|
"grad_norm": 0.052768442779779434, |
|
"learning_rate": 7.369901547116738e-06, |
|
"loss": 0.0022, |
|
"step": 2660 |
|
}, |
|
{ |
|
"epoch": 33.00379746835443, |
|
"grad_norm": 93.05609130859375, |
|
"learning_rate": 7.355836849507736e-06, |
|
"loss": 0.2399, |
|
"step": 2670 |
|
}, |
|
{ |
|
"epoch": 33.00506329113924, |
|
"grad_norm": 0.0038992296904325485, |
|
"learning_rate": 7.341772151898735e-06, |
|
"loss": 0.0088, |
|
"step": 2680 |
|
}, |
|
{ |
|
"epoch": 33.00632911392405, |
|
"grad_norm": 0.020863041281700134, |
|
"learning_rate": 7.327707454289733e-06, |
|
"loss": 0.0002, |
|
"step": 2690 |
|
}, |
|
{ |
|
"epoch": 33.00759493670886, |
|
"grad_norm": 0.006648873444646597, |
|
"learning_rate": 7.313642756680732e-06, |
|
"loss": 0.0388, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 33.00886075949367, |
|
"grad_norm": 156.20700073242188, |
|
"learning_rate": 7.29957805907173e-06, |
|
"loss": 0.2333, |
|
"step": 2710 |
|
}, |
|
{ |
|
"epoch": 33.01012658227848, |
|
"grad_norm": 0.008939997293055058, |
|
"learning_rate": 7.2855133614627295e-06, |
|
"loss": 0.0361, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 33.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.239119529724121, |
|
"eval_runtime": 8.9548, |
|
"eval_samples_per_second": 5.249, |
|
"eval_steps_per_second": 1.34, |
|
"step": 2720 |
|
}, |
|
{ |
|
"epoch": 34.00126582278481, |
|
"grad_norm": 15.270977020263672, |
|
"learning_rate": 7.2714486638537275e-06, |
|
"loss": 0.1088, |
|
"step": 2730 |
|
}, |
|
{ |
|
"epoch": 34.00253164556962, |
|
"grad_norm": 14.447574615478516, |
|
"learning_rate": 7.257383966244726e-06, |
|
"loss": 0.002, |
|
"step": 2740 |
|
}, |
|
{ |
|
"epoch": 34.00379746835443, |
|
"grad_norm": 0.009641851298511028, |
|
"learning_rate": 7.243319268635724e-06, |
|
"loss": 0.0003, |
|
"step": 2750 |
|
}, |
|
{ |
|
"epoch": 34.00506329113924, |
|
"grad_norm": 3.5248186588287354, |
|
"learning_rate": 7.229254571026724e-06, |
|
"loss": 0.0007, |
|
"step": 2760 |
|
}, |
|
{ |
|
"epoch": 34.00632911392405, |
|
"grad_norm": 0.06941874325275421, |
|
"learning_rate": 7.215189873417722e-06, |
|
"loss": 0.106, |
|
"step": 2770 |
|
}, |
|
{ |
|
"epoch": 34.00759493670886, |
|
"grad_norm": 0.0060513801872730255, |
|
"learning_rate": 7.201125175808721e-06, |
|
"loss": 0.0003, |
|
"step": 2780 |
|
}, |
|
{ |
|
"epoch": 34.00886075949367, |
|
"grad_norm": 0.03698160871863365, |
|
"learning_rate": 7.187060478199719e-06, |
|
"loss": 0.0003, |
|
"step": 2790 |
|
}, |
|
{ |
|
"epoch": 34.01012658227848, |
|
"grad_norm": 0.021343868225812912, |
|
"learning_rate": 7.172995780590718e-06, |
|
"loss": 0.0002, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 34.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 4.2255730628967285, |
|
"eval_runtime": 9.1143, |
|
"eval_samples_per_second": 5.157, |
|
"eval_steps_per_second": 1.317, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 35.00126582278481, |
|
"grad_norm": 0.02194453403353691, |
|
"learning_rate": 7.158931082981716e-06, |
|
"loss": 0.0002, |
|
"step": 2810 |
|
}, |
|
{ |
|
"epoch": 35.00253164556962, |
|
"grad_norm": 0.008681000210344791, |
|
"learning_rate": 7.144866385372715e-06, |
|
"loss": 0.0017, |
|
"step": 2820 |
|
}, |
|
{ |
|
"epoch": 35.00379746835443, |
|
"grad_norm": 0.003180101979523897, |
|
"learning_rate": 7.130801687763713e-06, |
|
"loss": 0.0002, |
|
"step": 2830 |
|
}, |
|
{ |
|
"epoch": 35.00506329113924, |
|
"grad_norm": 0.00399158988147974, |
|
"learning_rate": 7.116736990154712e-06, |
|
"loss": 0.0851, |
|
"step": 2840 |
|
}, |
|
{ |
|
"epoch": 35.00632911392405, |
|
"grad_norm": 317.16937255859375, |
|
"learning_rate": 7.10267229254571e-06, |
|
"loss": 0.1581, |
|
"step": 2850 |
|
}, |
|
{ |
|
"epoch": 35.00759493670886, |
|
"grad_norm": 0.006524229887872934, |
|
"learning_rate": 7.08860759493671e-06, |
|
"loss": 0.0002, |
|
"step": 2860 |
|
}, |
|
{ |
|
"epoch": 35.00886075949367, |
|
"grad_norm": 230.1353759765625, |
|
"learning_rate": 7.074542897327708e-06, |
|
"loss": 0.1746, |
|
"step": 2870 |
|
}, |
|
{ |
|
"epoch": 35.01012658227848, |
|
"grad_norm": 89.08749389648438, |
|
"learning_rate": 7.060478199718706e-06, |
|
"loss": 0.0082, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 35.01012658227848, |
|
"eval_accuracy": 0.3404255319148936, |
|
"eval_loss": 5.073359489440918, |
|
"eval_runtime": 8.501, |
|
"eval_samples_per_second": 5.529, |
|
"eval_steps_per_second": 1.412, |
|
"step": 2880 |
|
}, |
|
{ |
|
"epoch": 36.00126582278481, |
|
"grad_norm": 0.014753330498933792, |
|
"learning_rate": 7.046413502109706e-06, |
|
"loss": 0.0002, |
|
"step": 2890 |
|
}, |
|
{ |
|
"epoch": 36.00253164556962, |
|
"grad_norm": 3.0008251667022705, |
|
"learning_rate": 7.032348804500704e-06, |
|
"loss": 0.0373, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 36.00379746835443, |
|
"grad_norm": 0.010498768649995327, |
|
"learning_rate": 7.018284106891703e-06, |
|
"loss": 0.009, |
|
"step": 2910 |
|
}, |
|
{ |
|
"epoch": 36.00506329113924, |
|
"grad_norm": 0.06089121848344803, |
|
"learning_rate": 7.004219409282701e-06, |
|
"loss": 0.0003, |
|
"step": 2920 |
|
}, |
|
{ |
|
"epoch": 36.00632911392405, |
|
"grad_norm": 0.009548901580274105, |
|
"learning_rate": 6.9901547116737e-06, |
|
"loss": 0.1971, |
|
"step": 2930 |
|
}, |
|
{ |
|
"epoch": 36.00759493670886, |
|
"grad_norm": 0.11378785222768784, |
|
"learning_rate": 6.9760900140646985e-06, |
|
"loss": 0.0021, |
|
"step": 2940 |
|
}, |
|
{ |
|
"epoch": 36.00886075949367, |
|
"grad_norm": 0.004684086889028549, |
|
"learning_rate": 6.962025316455697e-06, |
|
"loss": 0.0879, |
|
"step": 2950 |
|
}, |
|
{ |
|
"epoch": 36.01012658227848, |
|
"grad_norm": 0.005387377459555864, |
|
"learning_rate": 6.947960618846695e-06, |
|
"loss": 0.0318, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 36.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.073455333709717, |
|
"eval_runtime": 8.491, |
|
"eval_samples_per_second": 5.535, |
|
"eval_steps_per_second": 1.413, |
|
"step": 2960 |
|
}, |
|
{ |
|
"epoch": 37.00126582278481, |
|
"grad_norm": 0.0073117660358548164, |
|
"learning_rate": 6.933895921237694e-06, |
|
"loss": 0.0002, |
|
"step": 2970 |
|
}, |
|
{ |
|
"epoch": 37.00253164556962, |
|
"grad_norm": 0.01575954630970955, |
|
"learning_rate": 6.919831223628692e-06, |
|
"loss": 0.0051, |
|
"step": 2980 |
|
}, |
|
{ |
|
"epoch": 37.00379746835443, |
|
"grad_norm": 0.005418274085968733, |
|
"learning_rate": 6.905766526019692e-06, |
|
"loss": 0.0003, |
|
"step": 2990 |
|
}, |
|
{ |
|
"epoch": 37.00506329113924, |
|
"grad_norm": 0.004269735421985388, |
|
"learning_rate": 6.89170182841069e-06, |
|
"loss": 0.0658, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 37.00632911392405, |
|
"grad_norm": 0.004627808462828398, |
|
"learning_rate": 6.8776371308016885e-06, |
|
"loss": 0.0001, |
|
"step": 3010 |
|
}, |
|
{ |
|
"epoch": 37.00759493670886, |
|
"grad_norm": 0.008293317630887032, |
|
"learning_rate": 6.8635724331926865e-06, |
|
"loss": 0.0904, |
|
"step": 3020 |
|
}, |
|
{ |
|
"epoch": 37.00886075949367, |
|
"grad_norm": 0.013359429314732552, |
|
"learning_rate": 6.849507735583686e-06, |
|
"loss": 0.1007, |
|
"step": 3030 |
|
}, |
|
{ |
|
"epoch": 37.01012658227848, |
|
"grad_norm": 0.006999185774475336, |
|
"learning_rate": 6.835443037974684e-06, |
|
"loss": 0.0002, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 37.01012658227848, |
|
"eval_accuracy": 0.2553191489361702, |
|
"eval_loss": 5.146430492401123, |
|
"eval_runtime": 8.4797, |
|
"eval_samples_per_second": 5.543, |
|
"eval_steps_per_second": 1.415, |
|
"step": 3040 |
|
}, |
|
{ |
|
"epoch": 38.00126582278481, |
|
"grad_norm": 0.005403840448707342, |
|
"learning_rate": 6.821378340365683e-06, |
|
"loss": 0.0003, |
|
"step": 3050 |
|
}, |
|
{ |
|
"epoch": 38.00253164556962, |
|
"grad_norm": 0.01304860319942236, |
|
"learning_rate": 6.807313642756681e-06, |
|
"loss": 0.0003, |
|
"step": 3060 |
|
}, |
|
{ |
|
"epoch": 38.00379746835443, |
|
"grad_norm": 0.29351142048835754, |
|
"learning_rate": 6.79324894514768e-06, |
|
"loss": 0.0004, |
|
"step": 3070 |
|
}, |
|
{ |
|
"epoch": 38.00506329113924, |
|
"grad_norm": 0.0071726636961102486, |
|
"learning_rate": 6.779184247538679e-06, |
|
"loss": 0.0002, |
|
"step": 3080 |
|
}, |
|
{ |
|
"epoch": 38.00632911392405, |
|
"grad_norm": 0.8798180222511292, |
|
"learning_rate": 6.7651195499296774e-06, |
|
"loss": 0.0578, |
|
"step": 3090 |
|
}, |
|
{ |
|
"epoch": 38.00759493670886, |
|
"grad_norm": 0.01378143671900034, |
|
"learning_rate": 6.751054852320675e-06, |
|
"loss": 0.0004, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 38.00886075949367, |
|
"grad_norm": 0.005133031401783228, |
|
"learning_rate": 6.736990154711674e-06, |
|
"loss": 0.0062, |
|
"step": 3110 |
|
}, |
|
{ |
|
"epoch": 38.01012658227848, |
|
"grad_norm": 0.01705407164990902, |
|
"learning_rate": 6.722925457102672e-06, |
|
"loss": 0.0003, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 38.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 4.634023189544678, |
|
"eval_runtime": 8.7121, |
|
"eval_samples_per_second": 5.395, |
|
"eval_steps_per_second": 1.377, |
|
"step": 3120 |
|
}, |
|
{ |
|
"epoch": 39.00126582278481, |
|
"grad_norm": 0.005898992531001568, |
|
"learning_rate": 6.708860759493672e-06, |
|
"loss": 0.0008, |
|
"step": 3130 |
|
}, |
|
{ |
|
"epoch": 39.00253164556962, |
|
"grad_norm": 0.40792742371559143, |
|
"learning_rate": 6.69479606188467e-06, |
|
"loss": 0.0003, |
|
"step": 3140 |
|
}, |
|
{ |
|
"epoch": 39.00379746835443, |
|
"grad_norm": 0.019352609291672707, |
|
"learning_rate": 6.680731364275669e-06, |
|
"loss": 0.0002, |
|
"step": 3150 |
|
}, |
|
{ |
|
"epoch": 39.00506329113924, |
|
"grad_norm": 0.0045697493478655815, |
|
"learning_rate": 6.666666666666667e-06, |
|
"loss": 0.0001, |
|
"step": 3160 |
|
}, |
|
{ |
|
"epoch": 39.00632911392405, |
|
"grad_norm": 0.005903047509491444, |
|
"learning_rate": 6.652601969057666e-06, |
|
"loss": 0.0001, |
|
"step": 3170 |
|
}, |
|
{ |
|
"epoch": 39.00759493670886, |
|
"grad_norm": 0.01714833825826645, |
|
"learning_rate": 6.638537271448664e-06, |
|
"loss": 0.1579, |
|
"step": 3180 |
|
}, |
|
{ |
|
"epoch": 39.00886075949367, |
|
"grad_norm": 0.07012953609228134, |
|
"learning_rate": 6.624472573839663e-06, |
|
"loss": 0.0002, |
|
"step": 3190 |
|
}, |
|
{ |
|
"epoch": 39.01012658227848, |
|
"grad_norm": 0.022253967821598053, |
|
"learning_rate": 6.610407876230661e-06, |
|
"loss": 0.48, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 39.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.337032794952393, |
|
"eval_runtime": 8.4914, |
|
"eval_samples_per_second": 5.535, |
|
"eval_steps_per_second": 1.413, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 40.00126582278481, |
|
"grad_norm": 233.1455535888672, |
|
"learning_rate": 6.59634317862166e-06, |
|
"loss": 0.0365, |
|
"step": 3210 |
|
}, |
|
{ |
|
"epoch": 40.00253164556962, |
|
"grad_norm": 0.008999134413897991, |
|
"learning_rate": 6.582278481012659e-06, |
|
"loss": 0.1475, |
|
"step": 3220 |
|
}, |
|
{ |
|
"epoch": 40.00379746835443, |
|
"grad_norm": 0.0032340127509087324, |
|
"learning_rate": 6.5682137834036576e-06, |
|
"loss": 0.1164, |
|
"step": 3230 |
|
}, |
|
{ |
|
"epoch": 40.00506329113924, |
|
"grad_norm": 0.014319919049739838, |
|
"learning_rate": 6.5541490857946555e-06, |
|
"loss": 0.0008, |
|
"step": 3240 |
|
}, |
|
{ |
|
"epoch": 40.00632911392405, |
|
"grad_norm": 0.019842559471726418, |
|
"learning_rate": 6.540084388185654e-06, |
|
"loss": 0.0006, |
|
"step": 3250 |
|
}, |
|
{ |
|
"epoch": 40.00759493670886, |
|
"grad_norm": 0.018094390630722046, |
|
"learning_rate": 6.526019690576652e-06, |
|
"loss": 0.0005, |
|
"step": 3260 |
|
}, |
|
{ |
|
"epoch": 40.00886075949367, |
|
"grad_norm": 0.00912653561681509, |
|
"learning_rate": 6.511954992967652e-06, |
|
"loss": 0.049, |
|
"step": 3270 |
|
}, |
|
{ |
|
"epoch": 40.01012658227848, |
|
"grad_norm": 0.03302593529224396, |
|
"learning_rate": 6.49789029535865e-06, |
|
"loss": 0.0002, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 40.01012658227848, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 4.582009315490723, |
|
"eval_runtime": 8.4753, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 1.416, |
|
"step": 3280 |
|
}, |
|
{ |
|
"epoch": 41.00126582278481, |
|
"grad_norm": 10.000889778137207, |
|
"learning_rate": 6.483825597749649e-06, |
|
"loss": 0.0011, |
|
"step": 3290 |
|
}, |
|
{ |
|
"epoch": 41.00253164556962, |
|
"grad_norm": 0.009547159075737, |
|
"learning_rate": 6.4697609001406485e-06, |
|
"loss": 0.0002, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 41.00379746835443, |
|
"grad_norm": 0.005821730941534042, |
|
"learning_rate": 6.4556962025316464e-06, |
|
"loss": 0.0001, |
|
"step": 3310 |
|
}, |
|
{ |
|
"epoch": 41.00506329113924, |
|
"grad_norm": 0.00588320242241025, |
|
"learning_rate": 6.441631504922645e-06, |
|
"loss": 0.0025, |
|
"step": 3320 |
|
}, |
|
{ |
|
"epoch": 41.00632911392405, |
|
"grad_norm": 298.84820556640625, |
|
"learning_rate": 6.427566807313643e-06, |
|
"loss": 0.2948, |
|
"step": 3330 |
|
}, |
|
{ |
|
"epoch": 41.00759493670886, |
|
"grad_norm": 0.00702635245397687, |
|
"learning_rate": 6.413502109704642e-06, |
|
"loss": 0.0002, |
|
"step": 3340 |
|
}, |
|
{ |
|
"epoch": 41.00886075949367, |
|
"grad_norm": 0.003056429559364915, |
|
"learning_rate": 6.39943741209564e-06, |
|
"loss": 0.0259, |
|
"step": 3350 |
|
}, |
|
{ |
|
"epoch": 41.01012658227848, |
|
"grad_norm": 0.011072452180087566, |
|
"learning_rate": 6.38537271448664e-06, |
|
"loss": 0.0002, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 41.01012658227848, |
|
"eval_accuracy": 0.3191489361702128, |
|
"eval_loss": 5.0156683921813965, |
|
"eval_runtime": 8.468, |
|
"eval_samples_per_second": 5.55, |
|
"eval_steps_per_second": 1.417, |
|
"step": 3360 |
|
}, |
|
{ |
|
"epoch": 42.00126582278481, |
|
"grad_norm": 0.022217601537704468, |
|
"learning_rate": 6.371308016877638e-06, |
|
"loss": 0.0002, |
|
"step": 3370 |
|
}, |
|
{ |
|
"epoch": 42.00253164556962, |
|
"grad_norm": 65.69084167480469, |
|
"learning_rate": 6.3572433192686365e-06, |
|
"loss": 0.0037, |
|
"step": 3380 |
|
}, |
|
{ |
|
"epoch": 42.00379746835443, |
|
"grad_norm": 0.07589118182659149, |
|
"learning_rate": 6.3431786216596345e-06, |
|
"loss": 0.0002, |
|
"step": 3390 |
|
}, |
|
{ |
|
"epoch": 42.00506329113924, |
|
"grad_norm": 0.003494243137538433, |
|
"learning_rate": 6.329113924050634e-06, |
|
"loss": 0.0013, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 42.00632911392405, |
|
"grad_norm": 0.0027374387718737125, |
|
"learning_rate": 6.315049226441632e-06, |
|
"loss": 0.2015, |
|
"step": 3410 |
|
}, |
|
{ |
|
"epoch": 42.00759493670886, |
|
"grad_norm": 0.0055436789989471436, |
|
"learning_rate": 6.300984528832631e-06, |
|
"loss": 0.0001, |
|
"step": 3420 |
|
}, |
|
{ |
|
"epoch": 42.00886075949367, |
|
"grad_norm": 0.006678999401628971, |
|
"learning_rate": 6.286919831223629e-06, |
|
"loss": 0.0016, |
|
"step": 3430 |
|
}, |
|
{ |
|
"epoch": 42.01012658227848, |
|
"grad_norm": 0.006669959519058466, |
|
"learning_rate": 6.272855133614629e-06, |
|
"loss": 0.1209, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 42.01012658227848, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 4.310915946960449, |
|
"eval_runtime": 8.4948, |
|
"eval_samples_per_second": 5.533, |
|
"eval_steps_per_second": 1.413, |
|
"step": 3440 |
|
}, |
|
{ |
|
"epoch": 43.00126582278481, |
|
"grad_norm": 36.991024017333984, |
|
"learning_rate": 6.2587904360056266e-06, |
|
"loss": 0.0031, |
|
"step": 3450 |
|
}, |
|
{ |
|
"epoch": 43.00253164556962, |
|
"grad_norm": 0.03218389302492142, |
|
"learning_rate": 6.244725738396625e-06, |
|
"loss": 0.0003, |
|
"step": 3460 |
|
}, |
|
{ |
|
"epoch": 43.00379746835443, |
|
"grad_norm": 0.0067522223107516766, |
|
"learning_rate": 6.230661040787623e-06, |
|
"loss": 0.0002, |
|
"step": 3470 |
|
}, |
|
{ |
|
"epoch": 43.00506329113924, |
|
"grad_norm": 0.09478826075792313, |
|
"learning_rate": 6.216596343178622e-06, |
|
"loss": 0.0001, |
|
"step": 3480 |
|
}, |
|
{ |
|
"epoch": 43.00632911392405, |
|
"grad_norm": 0.006108371540904045, |
|
"learning_rate": 6.20253164556962e-06, |
|
"loss": 0.0001, |
|
"step": 3490 |
|
}, |
|
{ |
|
"epoch": 43.00759493670886, |
|
"grad_norm": 0.004173735156655312, |
|
"learning_rate": 6.18846694796062e-06, |
|
"loss": 0.0002, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 43.00886075949367, |
|
"grad_norm": 0.004864380694925785, |
|
"learning_rate": 6.174402250351618e-06, |
|
"loss": 0.0001, |
|
"step": 3510 |
|
}, |
|
{ |
|
"epoch": 43.01012658227848, |
|
"grad_norm": 0.006738508120179176, |
|
"learning_rate": 6.160337552742617e-06, |
|
"loss": 0.0001, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 43.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 4.459574222564697, |
|
"eval_runtime": 8.4737, |
|
"eval_samples_per_second": 5.547, |
|
"eval_steps_per_second": 1.416, |
|
"step": 3520 |
|
}, |
|
{ |
|
"epoch": 44.00126582278481, |
|
"grad_norm": 0.00519252335652709, |
|
"learning_rate": 6.146272855133615e-06, |
|
"loss": 0.0001, |
|
"step": 3530 |
|
}, |
|
{ |
|
"epoch": 44.00253164556962, |
|
"grad_norm": 0.0036063846200704575, |
|
"learning_rate": 6.132208157524614e-06, |
|
"loss": 0.0001, |
|
"step": 3540 |
|
}, |
|
{ |
|
"epoch": 44.00379746835443, |
|
"grad_norm": 0.004207131918519735, |
|
"learning_rate": 6.118143459915612e-06, |
|
"loss": 0.0001, |
|
"step": 3550 |
|
}, |
|
{ |
|
"epoch": 44.00506329113924, |
|
"grad_norm": 0.0024055996909737587, |
|
"learning_rate": 6.104078762306611e-06, |
|
"loss": 0.0001, |
|
"step": 3560 |
|
}, |
|
{ |
|
"epoch": 44.00632911392405, |
|
"grad_norm": 0.004374026786535978, |
|
"learning_rate": 6.090014064697609e-06, |
|
"loss": 0.0001, |
|
"step": 3570 |
|
}, |
|
{ |
|
"epoch": 44.00759493670886, |
|
"grad_norm": 0.0027957686688750982, |
|
"learning_rate": 6.075949367088608e-06, |
|
"loss": 0.0002, |
|
"step": 3580 |
|
}, |
|
{ |
|
"epoch": 44.00886075949367, |
|
"grad_norm": 0.008639072068035603, |
|
"learning_rate": 6.061884669479607e-06, |
|
"loss": 0.004, |
|
"step": 3590 |
|
}, |
|
{ |
|
"epoch": 44.01012658227848, |
|
"grad_norm": 0.011701129376888275, |
|
"learning_rate": 6.0478199718706055e-06, |
|
"loss": 0.0109, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 44.01012658227848, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 4.425137996673584, |
|
"eval_runtime": 8.4694, |
|
"eval_samples_per_second": 5.549, |
|
"eval_steps_per_second": 1.417, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 45.00126582278481, |
|
"grad_norm": 0.017412984743714333, |
|
"learning_rate": 6.0337552742616035e-06, |
|
"loss": 0.0001, |
|
"step": 3610 |
|
}, |
|
{ |
|
"epoch": 45.00253164556962, |
|
"grad_norm": 0.007230939343571663, |
|
"learning_rate": 6.019690576652602e-06, |
|
"loss": 0.0698, |
|
"step": 3620 |
|
}, |
|
{ |
|
"epoch": 45.00379746835443, |
|
"grad_norm": 0.014825068414211273, |
|
"learning_rate": 6.0056258790436e-06, |
|
"loss": 0.0001, |
|
"step": 3630 |
|
}, |
|
{ |
|
"epoch": 45.00506329113924, |
|
"grad_norm": 0.013121239840984344, |
|
"learning_rate": 5.9915611814346e-06, |
|
"loss": 0.0001, |
|
"step": 3640 |
|
}, |
|
{ |
|
"epoch": 45.00632911392405, |
|
"grad_norm": 0.013468984514474869, |
|
"learning_rate": 5.977496483825598e-06, |
|
"loss": 0.0001, |
|
"step": 3650 |
|
}, |
|
{ |
|
"epoch": 45.00759493670886, |
|
"grad_norm": 0.06317138671875, |
|
"learning_rate": 5.963431786216597e-06, |
|
"loss": 0.0001, |
|
"step": 3660 |
|
}, |
|
{ |
|
"epoch": 45.00886075949367, |
|
"grad_norm": 0.003630951512604952, |
|
"learning_rate": 5.949367088607595e-06, |
|
"loss": 0.1698, |
|
"step": 3670 |
|
}, |
|
{ |
|
"epoch": 45.01012658227848, |
|
"grad_norm": 0.005787010304629803, |
|
"learning_rate": 5.935302390998594e-06, |
|
"loss": 0.0001, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 45.01012658227848, |
|
"eval_accuracy": 0.2978723404255319, |
|
"eval_loss": 5.296198844909668, |
|
"eval_runtime": 8.4784, |
|
"eval_samples_per_second": 5.543, |
|
"eval_steps_per_second": 1.415, |
|
"step": 3680 |
|
}, |
|
{ |
|
"epoch": 46.00126582278481, |
|
"grad_norm": 0.00311860884539783, |
|
"learning_rate": 5.921237693389592e-06, |
|
"loss": 0.0553, |
|
"step": 3690 |
|
}, |
|
{ |
|
"epoch": 46.00253164556962, |
|
"grad_norm": 0.005304061342030764, |
|
"learning_rate": 5.907172995780591e-06, |
|
"loss": 0.0002, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 46.00379746835443, |
|
"grad_norm": 0.015418877825140953, |
|
"learning_rate": 5.893108298171589e-06, |
|
"loss": 0.0001, |
|
"step": 3710 |
|
}, |
|
{ |
|
"epoch": 46.00506329113924, |
|
"grad_norm": 0.018117068335413933, |
|
"learning_rate": 5.879043600562588e-06, |
|
"loss": 0.0024, |
|
"step": 3720 |
|
}, |
|
{ |
|
"epoch": 46.00632911392405, |
|
"grad_norm": 0.004327620379626751, |
|
"learning_rate": 5.864978902953588e-06, |
|
"loss": 0.0003, |
|
"step": 3730 |
|
}, |
|
{ |
|
"epoch": 46.00759493670886, |
|
"grad_norm": 0.024266647174954414, |
|
"learning_rate": 5.850914205344586e-06, |
|
"loss": 0.0001, |
|
"step": 3740 |
|
}, |
|
{ |
|
"epoch": 46.00886075949367, |
|
"grad_norm": 414.96563720703125, |
|
"learning_rate": 5.8368495077355845e-06, |
|
"loss": 0.0487, |
|
"step": 3750 |
|
}, |
|
{ |
|
"epoch": 46.01012658227848, |
|
"grad_norm": 0.008569066412746906, |
|
"learning_rate": 5.8227848101265824e-06, |
|
"loss": 0.1516, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 46.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 4.23142147064209, |
|
"eval_runtime": 8.4548, |
|
"eval_samples_per_second": 5.559, |
|
"eval_steps_per_second": 1.419, |
|
"step": 3760 |
|
}, |
|
{ |
|
"epoch": 47.00126582278481, |
|
"grad_norm": 5.414425849914551, |
|
"learning_rate": 5.808720112517582e-06, |
|
"loss": 0.0007, |
|
"step": 3770 |
|
}, |
|
{ |
|
"epoch": 47.00253164556962, |
|
"grad_norm": 0.0027215760201215744, |
|
"learning_rate": 5.79465541490858e-06, |
|
"loss": 0.0002, |
|
"step": 3780 |
|
}, |
|
{ |
|
"epoch": 47.00379746835443, |
|
"grad_norm": 0.15611502528190613, |
|
"learning_rate": 5.780590717299579e-06, |
|
"loss": 0.0073, |
|
"step": 3790 |
|
}, |
|
{ |
|
"epoch": 47.00506329113924, |
|
"grad_norm": 0.0019717360846698284, |
|
"learning_rate": 5.766526019690577e-06, |
|
"loss": 0.0003, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 47.00632911392405, |
|
"grad_norm": 0.005944707430899143, |
|
"learning_rate": 5.7524613220815765e-06, |
|
"loss": 0.0035, |
|
"step": 3810 |
|
}, |
|
{ |
|
"epoch": 47.00759493670886, |
|
"grad_norm": 0.00346226803958416, |
|
"learning_rate": 5.7383966244725745e-06, |
|
"loss": 0.0001, |
|
"step": 3820 |
|
}, |
|
{ |
|
"epoch": 47.00886075949367, |
|
"grad_norm": 0.024175411090254784, |
|
"learning_rate": 5.724331926863573e-06, |
|
"loss": 0.0193, |
|
"step": 3830 |
|
}, |
|
{ |
|
"epoch": 47.01012658227848, |
|
"grad_norm": 0.03984224796295166, |
|
"learning_rate": 5.710267229254571e-06, |
|
"loss": 0.0001, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 47.01012658227848, |
|
"eval_accuracy": 0.5319148936170213, |
|
"eval_loss": 4.070488929748535, |
|
"eval_runtime": 8.5074, |
|
"eval_samples_per_second": 5.525, |
|
"eval_steps_per_second": 1.411, |
|
"step": 3840 |
|
}, |
|
{ |
|
"epoch": 48.00126582278481, |
|
"grad_norm": 0.012625842355191708, |
|
"learning_rate": 5.69620253164557e-06, |
|
"loss": 0.0218, |
|
"step": 3850 |
|
}, |
|
{ |
|
"epoch": 48.00253164556962, |
|
"grad_norm": 0.008255310356616974, |
|
"learning_rate": 5.682137834036568e-06, |
|
"loss": 0.012, |
|
"step": 3860 |
|
}, |
|
{ |
|
"epoch": 48.00379746835443, |
|
"grad_norm": 0.019036876037716866, |
|
"learning_rate": 5.668073136427568e-06, |
|
"loss": 0.001, |
|
"step": 3870 |
|
}, |
|
{ |
|
"epoch": 48.00506329113924, |
|
"grad_norm": 0.013268685899674892, |
|
"learning_rate": 5.654008438818566e-06, |
|
"loss": 0.0002, |
|
"step": 3880 |
|
}, |
|
{ |
|
"epoch": 48.00632911392405, |
|
"grad_norm": 0.01589319296181202, |
|
"learning_rate": 5.639943741209565e-06, |
|
"loss": 0.0756, |
|
"step": 3890 |
|
}, |
|
{ |
|
"epoch": 48.00759493670886, |
|
"grad_norm": 0.0036217246670275927, |
|
"learning_rate": 5.6258790436005626e-06, |
|
"loss": 0.1435, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 48.00886075949367, |
|
"grad_norm": 0.002351459814235568, |
|
"learning_rate": 5.611814345991562e-06, |
|
"loss": 0.0001, |
|
"step": 3910 |
|
}, |
|
{ |
|
"epoch": 48.01012658227848, |
|
"grad_norm": 0.0023628135677427053, |
|
"learning_rate": 5.59774964838256e-06, |
|
"loss": 0.0001, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 48.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.55861234664917, |
|
"eval_runtime": 8.4995, |
|
"eval_samples_per_second": 5.53, |
|
"eval_steps_per_second": 1.412, |
|
"step": 3920 |
|
}, |
|
{ |
|
"epoch": 49.00126582278481, |
|
"grad_norm": 0.003683815710246563, |
|
"learning_rate": 5.583684950773559e-06, |
|
"loss": 0.0001, |
|
"step": 3930 |
|
}, |
|
{ |
|
"epoch": 49.00253164556962, |
|
"grad_norm": 0.004656449891626835, |
|
"learning_rate": 5.569620253164557e-06, |
|
"loss": 0.0001, |
|
"step": 3940 |
|
}, |
|
{ |
|
"epoch": 49.00379746835443, |
|
"grad_norm": 0.012214281596243382, |
|
"learning_rate": 5.555555555555557e-06, |
|
"loss": 0.0052, |
|
"step": 3950 |
|
}, |
|
{ |
|
"epoch": 49.00506329113924, |
|
"grad_norm": 0.009479358792304993, |
|
"learning_rate": 5.541490857946555e-06, |
|
"loss": 0.012, |
|
"step": 3960 |
|
}, |
|
{ |
|
"epoch": 49.00632911392405, |
|
"grad_norm": 0.008819793350994587, |
|
"learning_rate": 5.5274261603375535e-06, |
|
"loss": 0.0001, |
|
"step": 3970 |
|
}, |
|
{ |
|
"epoch": 49.00759493670886, |
|
"grad_norm": 0.04174829646945, |
|
"learning_rate": 5.5133614627285514e-06, |
|
"loss": 0.0001, |
|
"step": 3980 |
|
}, |
|
{ |
|
"epoch": 49.00886075949367, |
|
"grad_norm": 0.0032157686073333025, |
|
"learning_rate": 5.49929676511955e-06, |
|
"loss": 0.1845, |
|
"step": 3990 |
|
}, |
|
{ |
|
"epoch": 49.01012658227848, |
|
"grad_norm": 0.010618672706186771, |
|
"learning_rate": 5.485232067510548e-06, |
|
"loss": 0.0266, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 49.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 4.947876453399658, |
|
"eval_runtime": 8.5288, |
|
"eval_samples_per_second": 5.511, |
|
"eval_steps_per_second": 1.407, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 50.00126582278481, |
|
"grad_norm": 0.056022197008132935, |
|
"learning_rate": 5.471167369901548e-06, |
|
"loss": 0.2505, |
|
"step": 4010 |
|
}, |
|
{ |
|
"epoch": 50.00253164556962, |
|
"grad_norm": 0.004495659377425909, |
|
"learning_rate": 5.457102672292546e-06, |
|
"loss": 0.053, |
|
"step": 4020 |
|
}, |
|
{ |
|
"epoch": 50.00379746835443, |
|
"grad_norm": 0.003035302273929119, |
|
"learning_rate": 5.443037974683545e-06, |
|
"loss": 0.0001, |
|
"step": 4030 |
|
}, |
|
{ |
|
"epoch": 50.00506329113924, |
|
"grad_norm": 0.006570629775524139, |
|
"learning_rate": 5.428973277074543e-06, |
|
"loss": 0.0002, |
|
"step": 4040 |
|
}, |
|
{ |
|
"epoch": 50.00632911392405, |
|
"grad_norm": 323.4715881347656, |
|
"learning_rate": 5.414908579465542e-06, |
|
"loss": 0.055, |
|
"step": 4050 |
|
}, |
|
{ |
|
"epoch": 50.00759493670886, |
|
"grad_norm": 0.002824920229613781, |
|
"learning_rate": 5.40084388185654e-06, |
|
"loss": 0.0001, |
|
"step": 4060 |
|
}, |
|
{ |
|
"epoch": 50.00886075949367, |
|
"grad_norm": 0.06357023864984512, |
|
"learning_rate": 5.386779184247539e-06, |
|
"loss": 0.0001, |
|
"step": 4070 |
|
}, |
|
{ |
|
"epoch": 50.01012658227848, |
|
"grad_norm": 0.004729899112135172, |
|
"learning_rate": 5.372714486638537e-06, |
|
"loss": 0.0001, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 50.01012658227848, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.32703161239624, |
|
"eval_runtime": 8.4715, |
|
"eval_samples_per_second": 5.548, |
|
"eval_steps_per_second": 1.417, |
|
"step": 4080 |
|
}, |
|
{ |
|
"epoch": 51.00126582278481, |
|
"grad_norm": 0.008333638310432434, |
|
"learning_rate": 5.358649789029536e-06, |
|
"loss": 0.0002, |
|
"step": 4090 |
|
}, |
|
{ |
|
"epoch": 51.00253164556962, |
|
"grad_norm": 0.009458293206989765, |
|
"learning_rate": 5.344585091420535e-06, |
|
"loss": 0.0001, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 51.00379746835443, |
|
"grad_norm": 0.0024418376851826906, |
|
"learning_rate": 5.330520393811534e-06, |
|
"loss": 0.0002, |
|
"step": 4110 |
|
}, |
|
{ |
|
"epoch": 51.00506329113924, |
|
"grad_norm": 0.004669019021093845, |
|
"learning_rate": 5.3164556962025316e-06, |
|
"loss": 0.0001, |
|
"step": 4120 |
|
}, |
|
{ |
|
"epoch": 51.00632911392405, |
|
"grad_norm": 0.003113614860922098, |
|
"learning_rate": 5.30239099859353e-06, |
|
"loss": 0.0012, |
|
"step": 4130 |
|
}, |
|
{ |
|
"epoch": 51.00759493670886, |
|
"grad_norm": 0.003157148603349924, |
|
"learning_rate": 5.28832630098453e-06, |
|
"loss": 0.0001, |
|
"step": 4140 |
|
}, |
|
{ |
|
"epoch": 51.00886075949367, |
|
"grad_norm": 0.004666858818382025, |
|
"learning_rate": 5.274261603375528e-06, |
|
"loss": 0.1445, |
|
"step": 4150 |
|
}, |
|
{ |
|
"epoch": 51.01012658227848, |
|
"grad_norm": 0.002661221195012331, |
|
"learning_rate": 5.260196905766527e-06, |
|
"loss": 0.1307, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 51.01012658227848, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 4.794792175292969, |
|
"eval_runtime": 8.4848, |
|
"eval_samples_per_second": 5.539, |
|
"eval_steps_per_second": 1.414, |
|
"step": 4160 |
|
}, |
|
{ |
|
"epoch": 52.00126582278481, |
|
"grad_norm": 0.001836895477026701, |
|
"learning_rate": 5.246132208157525e-06, |
|
"loss": 0.0002, |
|
"step": 4170 |
|
}, |
|
{ |
|
"epoch": 52.00253164556962, |
|
"grad_norm": 2.3909878730773926, |
|
"learning_rate": 5.2320675105485245e-06, |
|
"loss": 0.0084, |
|
"step": 4180 |
|
}, |
|
{ |
|
"epoch": 52.00379746835443, |
|
"grad_norm": 0.0022460331674665213, |
|
"learning_rate": 5.2180028129395225e-06, |
|
"loss": 0.0005, |
|
"step": 4190 |
|
}, |
|
{ |
|
"epoch": 52.00506329113924, |
|
"grad_norm": 0.7268118858337402, |
|
"learning_rate": 5.203938115330521e-06, |
|
"loss": 0.0008, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 52.00632911392405, |
|
"grad_norm": 0.0033825428690761328, |
|
"learning_rate": 5.189873417721519e-06, |
|
"loss": 0.0001, |
|
"step": 4210 |
|
}, |
|
{ |
|
"epoch": 52.00759493670886, |
|
"grad_norm": 0.006189883220940828, |
|
"learning_rate": 5.175808720112518e-06, |
|
"loss": 0.0001, |
|
"step": 4220 |
|
}, |
|
{ |
|
"epoch": 52.00886075949367, |
|
"grad_norm": 0.006958500016480684, |
|
"learning_rate": 5.161744022503516e-06, |
|
"loss": 0.0002, |
|
"step": 4230 |
|
}, |
|
{ |
|
"epoch": 52.01012658227848, |
|
"grad_norm": 0.003031873842701316, |
|
"learning_rate": 5.147679324894516e-06, |
|
"loss": 0.0019, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 52.01012658227848, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 4.363827705383301, |
|
"eval_runtime": 8.6439, |
|
"eval_samples_per_second": 5.437, |
|
"eval_steps_per_second": 1.388, |
|
"step": 4240 |
|
}, |
|
{ |
|
"epoch": 53.00126582278481, |
|
"grad_norm": 0.09558191895484924, |
|
"learning_rate": 5.133614627285514e-06, |
|
"loss": 0.0001, |
|
"step": 4250 |
|
}, |
|
{ |
|
"epoch": 53.00253164556962, |
|
"grad_norm": 0.002434414578601718, |
|
"learning_rate": 5.1195499296765125e-06, |
|
"loss": 0.0007, |
|
"step": 4260 |
|
}, |
|
{ |
|
"epoch": 53.00379746835443, |
|
"grad_norm": 0.018281536176800728, |
|
"learning_rate": 5.1054852320675105e-06, |
|
"loss": 0.0004, |
|
"step": 4270 |
|
}, |
|
{ |
|
"epoch": 53.00506329113924, |
|
"grad_norm": 0.003481107298284769, |
|
"learning_rate": 5.09142053445851e-06, |
|
"loss": 0.0001, |
|
"step": 4280 |
|
}, |
|
{ |
|
"epoch": 53.00632911392405, |
|
"grad_norm": 0.0011617491254583001, |
|
"learning_rate": 5.077355836849508e-06, |
|
"loss": 0.0001, |
|
"step": 4290 |
|
}, |
|
{ |
|
"epoch": 53.00759493670886, |
|
"grad_norm": 0.007551996968686581, |
|
"learning_rate": 5.063291139240507e-06, |
|
"loss": 0.0001, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 53.00886075949367, |
|
"grad_norm": 0.003541940590366721, |
|
"learning_rate": 5.049226441631505e-06, |
|
"loss": 0.0001, |
|
"step": 4310 |
|
}, |
|
{ |
|
"epoch": 53.01012658227848, |
|
"grad_norm": 0.0031788817141205072, |
|
"learning_rate": 5.035161744022505e-06, |
|
"loss": 0.0001, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 53.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.586310386657715, |
|
"eval_runtime": 8.4934, |
|
"eval_samples_per_second": 5.534, |
|
"eval_steps_per_second": 1.413, |
|
"step": 4320 |
|
}, |
|
{ |
|
"epoch": 54.00126582278481, |
|
"grad_norm": 0.0024340234231203794, |
|
"learning_rate": 5.021097046413503e-06, |
|
"loss": 0.0001, |
|
"step": 4330 |
|
}, |
|
{ |
|
"epoch": 54.00253164556962, |
|
"grad_norm": 0.0034480541944503784, |
|
"learning_rate": 5.007032348804501e-06, |
|
"loss": 0.0001, |
|
"step": 4340 |
|
}, |
|
{ |
|
"epoch": 54.00379746835443, |
|
"grad_norm": 0.0023180190473794937, |
|
"learning_rate": 4.9929676511955e-06, |
|
"loss": 0.0001, |
|
"step": 4350 |
|
}, |
|
{ |
|
"epoch": 54.00506329113924, |
|
"grad_norm": 0.0015061123995110393, |
|
"learning_rate": 4.978902953586498e-06, |
|
"loss": 0.0001, |
|
"step": 4360 |
|
}, |
|
{ |
|
"epoch": 54.00632911392405, |
|
"grad_norm": 0.013990904204547405, |
|
"learning_rate": 4.964838255977497e-06, |
|
"loss": 0.0001, |
|
"step": 4370 |
|
}, |
|
{ |
|
"epoch": 54.00759493670886, |
|
"grad_norm": 0.0013285009190440178, |
|
"learning_rate": 4.950773558368496e-06, |
|
"loss": 0.0001, |
|
"step": 4380 |
|
}, |
|
{ |
|
"epoch": 54.00886075949367, |
|
"grad_norm": 0.00343449623323977, |
|
"learning_rate": 4.936708860759495e-06, |
|
"loss": 0.0001, |
|
"step": 4390 |
|
}, |
|
{ |
|
"epoch": 54.01012658227848, |
|
"grad_norm": 0.003100321162492037, |
|
"learning_rate": 4.922644163150493e-06, |
|
"loss": 0.0001, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 54.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.737309455871582, |
|
"eval_runtime": 8.4983, |
|
"eval_samples_per_second": 5.53, |
|
"eval_steps_per_second": 1.412, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 55.00126582278481, |
|
"grad_norm": 0.0022656081710010767, |
|
"learning_rate": 4.9085794655414915e-06, |
|
"loss": 0.0001, |
|
"step": 4410 |
|
}, |
|
{ |
|
"epoch": 55.00253164556962, |
|
"grad_norm": 0.002674259478226304, |
|
"learning_rate": 4.89451476793249e-06, |
|
"loss": 0.0001, |
|
"step": 4420 |
|
}, |
|
{ |
|
"epoch": 55.00379746835443, |
|
"grad_norm": 0.0027046040631830692, |
|
"learning_rate": 4.880450070323488e-06, |
|
"loss": 0.0024, |
|
"step": 4430 |
|
}, |
|
{ |
|
"epoch": 55.00506329113924, |
|
"grad_norm": 0.003956619184464216, |
|
"learning_rate": 4.866385372714487e-06, |
|
"loss": 0.0001, |
|
"step": 4440 |
|
}, |
|
{ |
|
"epoch": 55.00632911392405, |
|
"grad_norm": 0.03139903396368027, |
|
"learning_rate": 4.852320675105486e-06, |
|
"loss": 0.0742, |
|
"step": 4450 |
|
}, |
|
{ |
|
"epoch": 55.00759493670886, |
|
"grad_norm": 0.00574122928082943, |
|
"learning_rate": 4.838255977496485e-06, |
|
"loss": 0.2666, |
|
"step": 4460 |
|
}, |
|
{ |
|
"epoch": 55.00886075949367, |
|
"grad_norm": 0.012300165370106697, |
|
"learning_rate": 4.824191279887483e-06, |
|
"loss": 0.0001, |
|
"step": 4470 |
|
}, |
|
{ |
|
"epoch": 55.01012658227848, |
|
"grad_norm": 5.114750385284424, |
|
"learning_rate": 4.8101265822784815e-06, |
|
"loss": 0.0006, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 55.01012658227848, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 3.9066474437713623, |
|
"eval_runtime": 8.5347, |
|
"eval_samples_per_second": 5.507, |
|
"eval_steps_per_second": 1.406, |
|
"step": 4480 |
|
}, |
|
{ |
|
"epoch": 56.00126582278481, |
|
"grad_norm": 11.49494457244873, |
|
"learning_rate": 4.79606188466948e-06, |
|
"loss": 0.0011, |
|
"step": 4490 |
|
}, |
|
{ |
|
"epoch": 56.00253164556962, |
|
"grad_norm": 0.005774380173534155, |
|
"learning_rate": 4.781997187060478e-06, |
|
"loss": 0.0001, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 56.00379746835443, |
|
"grad_norm": 0.006357602309435606, |
|
"learning_rate": 4.767932489451477e-06, |
|
"loss": 0.0001, |
|
"step": 4510 |
|
}, |
|
{ |
|
"epoch": 56.00506329113924, |
|
"grad_norm": 0.002659859601408243, |
|
"learning_rate": 4.753867791842476e-06, |
|
"loss": 0.0017, |
|
"step": 4520 |
|
}, |
|
{ |
|
"epoch": 56.00632911392405, |
|
"grad_norm": 0.013889423571527004, |
|
"learning_rate": 4.739803094233475e-06, |
|
"loss": 0.0001, |
|
"step": 4530 |
|
}, |
|
{ |
|
"epoch": 56.00759493670886, |
|
"grad_norm": 0.002410717075690627, |
|
"learning_rate": 4.725738396624473e-06, |
|
"loss": 0.0001, |
|
"step": 4540 |
|
}, |
|
{ |
|
"epoch": 56.00886075949367, |
|
"grad_norm": 0.0023062098771333694, |
|
"learning_rate": 4.711673699015472e-06, |
|
"loss": 0.0001, |
|
"step": 4550 |
|
}, |
|
{ |
|
"epoch": 56.01012658227848, |
|
"grad_norm": 0.0023318820167332888, |
|
"learning_rate": 4.6976090014064704e-06, |
|
"loss": 0.0001, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 56.01012658227848, |
|
"eval_accuracy": 0.46808510638297873, |
|
"eval_loss": 4.031365394592285, |
|
"eval_runtime": 8.457, |
|
"eval_samples_per_second": 5.558, |
|
"eval_steps_per_second": 1.419, |
|
"step": 4560 |
|
}, |
|
{ |
|
"epoch": 57.00126582278481, |
|
"grad_norm": 0.004980940837413073, |
|
"learning_rate": 4.683544303797468e-06, |
|
"loss": 0.0001, |
|
"step": 4570 |
|
}, |
|
{ |
|
"epoch": 57.00253164556962, |
|
"grad_norm": 0.0019251375924795866, |
|
"learning_rate": 4.669479606188467e-06, |
|
"loss": 0.0001, |
|
"step": 4580 |
|
}, |
|
{ |
|
"epoch": 57.00379746835443, |
|
"grad_norm": 0.0028012413531541824, |
|
"learning_rate": 4.655414908579466e-06, |
|
"loss": 0.0174, |
|
"step": 4590 |
|
}, |
|
{ |
|
"epoch": 57.00506329113924, |
|
"grad_norm": 143.03387451171875, |
|
"learning_rate": 4.641350210970465e-06, |
|
"loss": 0.0119, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 57.00632911392405, |
|
"grad_norm": 0.003186359303072095, |
|
"learning_rate": 4.627285513361463e-06, |
|
"loss": 0.0001, |
|
"step": 4610 |
|
}, |
|
{ |
|
"epoch": 57.00759493670886, |
|
"grad_norm": 0.11152984201908112, |
|
"learning_rate": 4.613220815752462e-06, |
|
"loss": 0.0037, |
|
"step": 4620 |
|
}, |
|
{ |
|
"epoch": 57.00886075949367, |
|
"grad_norm": 0.001349453697912395, |
|
"learning_rate": 4.5991561181434605e-06, |
|
"loss": 0.1545, |
|
"step": 4630 |
|
}, |
|
{ |
|
"epoch": 57.01012658227848, |
|
"grad_norm": 0.05059582367539406, |
|
"learning_rate": 4.5850914205344585e-06, |
|
"loss": 0.0001, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 57.01012658227848, |
|
"eval_accuracy": 0.5106382978723404, |
|
"eval_loss": 4.058121204376221, |
|
"eval_runtime": 8.491, |
|
"eval_samples_per_second": 5.535, |
|
"eval_steps_per_second": 1.413, |
|
"step": 4640 |
|
}, |
|
{ |
|
"epoch": 58.00126582278481, |
|
"grad_norm": 0.003746249247342348, |
|
"learning_rate": 4.571026722925457e-06, |
|
"loss": 0.0, |
|
"step": 4650 |
|
}, |
|
{ |
|
"epoch": 58.00253164556962, |
|
"grad_norm": 0.0019692752975970507, |
|
"learning_rate": 4.556962025316456e-06, |
|
"loss": 0.0001, |
|
"step": 4660 |
|
}, |
|
{ |
|
"epoch": 58.00379746835443, |
|
"grad_norm": 0.002934554358944297, |
|
"learning_rate": 4.542897327707454e-06, |
|
"loss": 0.0001, |
|
"step": 4670 |
|
}, |
|
{ |
|
"epoch": 58.00506329113924, |
|
"grad_norm": 0.005108493380248547, |
|
"learning_rate": 4.528832630098453e-06, |
|
"loss": 0.0402, |
|
"step": 4680 |
|
}, |
|
{ |
|
"epoch": 58.00632911392405, |
|
"grad_norm": 0.004260794725269079, |
|
"learning_rate": 4.514767932489452e-06, |
|
"loss": 0.0006, |
|
"step": 4690 |
|
}, |
|
{ |
|
"epoch": 58.00759493670886, |
|
"grad_norm": 0.06016235053539276, |
|
"learning_rate": 4.5007032348804506e-06, |
|
"loss": 0.0004, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 58.00886075949367, |
|
"grad_norm": 0.0011827549897134304, |
|
"learning_rate": 4.4866385372714485e-06, |
|
"loss": 0.114, |
|
"step": 4710 |
|
}, |
|
{ |
|
"epoch": 58.01012658227848, |
|
"grad_norm": 0.002215220592916012, |
|
"learning_rate": 4.472573839662447e-06, |
|
"loss": 0.0001, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 58.01012658227848, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 5.004458904266357, |
|
"eval_runtime": 8.4472, |
|
"eval_samples_per_second": 5.564, |
|
"eval_steps_per_second": 1.421, |
|
"step": 4720 |
|
}, |
|
{ |
|
"epoch": 59.00126582278481, |
|
"grad_norm": 0.004406394902616739, |
|
"learning_rate": 4.458509142053446e-06, |
|
"loss": 0.0005, |
|
"step": 4730 |
|
}, |
|
{ |
|
"epoch": 59.00253164556962, |
|
"grad_norm": 0.002640231978148222, |
|
"learning_rate": 4.444444444444444e-06, |
|
"loss": 0.0001, |
|
"step": 4740 |
|
}, |
|
{ |
|
"epoch": 59.00379746835443, |
|
"grad_norm": 0.0029783693607896566, |
|
"learning_rate": 4.430379746835443e-06, |
|
"loss": 0.0001, |
|
"step": 4750 |
|
}, |
|
{ |
|
"epoch": 59.00506329113924, |
|
"grad_norm": 0.013541797176003456, |
|
"learning_rate": 4.416315049226442e-06, |
|
"loss": 0.0004, |
|
"step": 4760 |
|
}, |
|
{ |
|
"epoch": 59.00632911392405, |
|
"grad_norm": 0.0070534199476242065, |
|
"learning_rate": 4.402250351617441e-06, |
|
"loss": 0.0002, |
|
"step": 4770 |
|
}, |
|
{ |
|
"epoch": 59.00759493670886, |
|
"grad_norm": 0.0034858768340200186, |
|
"learning_rate": 4.3881856540084394e-06, |
|
"loss": 0.0001, |
|
"step": 4780 |
|
}, |
|
{ |
|
"epoch": 59.00886075949367, |
|
"grad_norm": 0.00406244769692421, |
|
"learning_rate": 4.374120956399438e-06, |
|
"loss": 0.0, |
|
"step": 4790 |
|
}, |
|
{ |
|
"epoch": 59.01012658227848, |
|
"grad_norm": 0.0017109077889472246, |
|
"learning_rate": 4.360056258790436e-06, |
|
"loss": 0.0001, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 59.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.089483737945557, |
|
"eval_runtime": 8.467, |
|
"eval_samples_per_second": 5.551, |
|
"eval_steps_per_second": 1.417, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 60.00126582278481, |
|
"grad_norm": 0.0024315589107573032, |
|
"learning_rate": 4.345991561181435e-06, |
|
"loss": 0.0, |
|
"step": 4810 |
|
}, |
|
{ |
|
"epoch": 60.00253164556962, |
|
"grad_norm": 0.0012313745683059096, |
|
"learning_rate": 4.331926863572434e-06, |
|
"loss": 0.0001, |
|
"step": 4820 |
|
}, |
|
{ |
|
"epoch": 60.00379746835443, |
|
"grad_norm": 0.0019479021430015564, |
|
"learning_rate": 4.317862165963433e-06, |
|
"loss": 0.0004, |
|
"step": 4830 |
|
}, |
|
{ |
|
"epoch": 60.00506329113924, |
|
"grad_norm": 0.00494040735065937, |
|
"learning_rate": 4.303797468354431e-06, |
|
"loss": 0.0872, |
|
"step": 4840 |
|
}, |
|
{ |
|
"epoch": 60.00632911392405, |
|
"grad_norm": 0.012567605823278427, |
|
"learning_rate": 4.2897327707454295e-06, |
|
"loss": 0.0011, |
|
"step": 4850 |
|
}, |
|
{ |
|
"epoch": 60.00759493670886, |
|
"grad_norm": 0.002357608638703823, |
|
"learning_rate": 4.275668073136428e-06, |
|
"loss": 0.0296, |
|
"step": 4860 |
|
}, |
|
{ |
|
"epoch": 60.00886075949367, |
|
"grad_norm": 0.0030609623063355684, |
|
"learning_rate": 4.261603375527426e-06, |
|
"loss": 0.0243, |
|
"step": 4870 |
|
}, |
|
{ |
|
"epoch": 60.01012658227848, |
|
"grad_norm": 0.0016012099804356694, |
|
"learning_rate": 4.247538677918425e-06, |
|
"loss": 0.0713, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 60.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 5.042915344238281, |
|
"eval_runtime": 8.491, |
|
"eval_samples_per_second": 5.535, |
|
"eval_steps_per_second": 1.413, |
|
"step": 4880 |
|
}, |
|
{ |
|
"epoch": 61.00126582278481, |
|
"grad_norm": 0.004251533187925816, |
|
"learning_rate": 4.233473980309424e-06, |
|
"loss": 0.0766, |
|
"step": 4890 |
|
}, |
|
{ |
|
"epoch": 61.00253164556962, |
|
"grad_norm": 0.0019293460063636303, |
|
"learning_rate": 4.219409282700423e-06, |
|
"loss": 0.0006, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 61.00379746835443, |
|
"grad_norm": 0.004420694895088673, |
|
"learning_rate": 4.205344585091421e-06, |
|
"loss": 0.0567, |
|
"step": 4910 |
|
}, |
|
{ |
|
"epoch": 61.00506329113924, |
|
"grad_norm": 0.2990714907646179, |
|
"learning_rate": 4.1912798874824196e-06, |
|
"loss": 0.0274, |
|
"step": 4920 |
|
}, |
|
{ |
|
"epoch": 61.00632911392405, |
|
"grad_norm": 0.004468689672648907, |
|
"learning_rate": 4.177215189873418e-06, |
|
"loss": 0.0001, |
|
"step": 4930 |
|
}, |
|
{ |
|
"epoch": 61.00759493670886, |
|
"grad_norm": 0.004564017057418823, |
|
"learning_rate": 4.163150492264416e-06, |
|
"loss": 0.2423, |
|
"step": 4940 |
|
}, |
|
{ |
|
"epoch": 61.00886075949367, |
|
"grad_norm": 93.79319763183594, |
|
"learning_rate": 4.149085794655415e-06, |
|
"loss": 0.0517, |
|
"step": 4950 |
|
}, |
|
{ |
|
"epoch": 61.01012658227848, |
|
"grad_norm": 0.004615637473762035, |
|
"learning_rate": 4.135021097046414e-06, |
|
"loss": 0.0017, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 61.01012658227848, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.786965370178223, |
|
"eval_runtime": 8.4752, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 1.416, |
|
"step": 4960 |
|
}, |
|
{ |
|
"epoch": 62.00126582278481, |
|
"grad_norm": 0.061868444085121155, |
|
"learning_rate": 4.120956399437413e-06, |
|
"loss": 0.0001, |
|
"step": 4970 |
|
}, |
|
{ |
|
"epoch": 62.00253164556962, |
|
"grad_norm": 0.006057640537619591, |
|
"learning_rate": 4.106891701828411e-06, |
|
"loss": 0.0013, |
|
"step": 4980 |
|
}, |
|
{ |
|
"epoch": 62.00379746835443, |
|
"grad_norm": 4.027284145355225, |
|
"learning_rate": 4.09282700421941e-06, |
|
"loss": 0.0004, |
|
"step": 4990 |
|
}, |
|
{ |
|
"epoch": 62.00506329113924, |
|
"grad_norm": 0.00944253709167242, |
|
"learning_rate": 4.0787623066104084e-06, |
|
"loss": 0.0001, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 62.00632911392405, |
|
"grad_norm": 0.0035694832913577557, |
|
"learning_rate": 4.064697609001406e-06, |
|
"loss": 0.0001, |
|
"step": 5010 |
|
}, |
|
{ |
|
"epoch": 62.00759493670886, |
|
"grad_norm": 0.0015398276736959815, |
|
"learning_rate": 4.050632911392405e-06, |
|
"loss": 0.0671, |
|
"step": 5020 |
|
}, |
|
{ |
|
"epoch": 62.00886075949367, |
|
"grad_norm": 0.002066017361357808, |
|
"learning_rate": 4.036568213783404e-06, |
|
"loss": 0.0009, |
|
"step": 5030 |
|
}, |
|
{ |
|
"epoch": 62.01012658227848, |
|
"grad_norm": 0.003685934003442526, |
|
"learning_rate": 4.022503516174403e-06, |
|
"loss": 0.0676, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 62.01012658227848, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 5.095708847045898, |
|
"eval_runtime": 8.4925, |
|
"eval_samples_per_second": 5.534, |
|
"eval_steps_per_second": 1.413, |
|
"step": 5040 |
|
}, |
|
{ |
|
"epoch": 63.00126582278481, |
|
"grad_norm": 0.0065714651718735695, |
|
"learning_rate": 4.008438818565401e-06, |
|
"loss": 0.0001, |
|
"step": 5050 |
|
}, |
|
{ |
|
"epoch": 63.00253164556962, |
|
"grad_norm": 0.003956567496061325, |
|
"learning_rate": 3.9943741209564e-06, |
|
"loss": 0.0001, |
|
"step": 5060 |
|
}, |
|
{ |
|
"epoch": 63.00379746835443, |
|
"grad_norm": 0.008157577365636826, |
|
"learning_rate": 3.9803094233473985e-06, |
|
"loss": 0.0, |
|
"step": 5070 |
|
}, |
|
{ |
|
"epoch": 63.00506329113924, |
|
"grad_norm": 0.0031191923189908266, |
|
"learning_rate": 3.9662447257383965e-06, |
|
"loss": 0.0, |
|
"step": 5080 |
|
}, |
|
{ |
|
"epoch": 63.00632911392405, |
|
"grad_norm": 0.0020041377283632755, |
|
"learning_rate": 3.952180028129395e-06, |
|
"loss": 0.0001, |
|
"step": 5090 |
|
}, |
|
{ |
|
"epoch": 63.00759493670886, |
|
"grad_norm": 0.004067094065248966, |
|
"learning_rate": 3.938115330520394e-06, |
|
"loss": 0.0001, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 63.00886075949367, |
|
"grad_norm": 0.0008815817418508232, |
|
"learning_rate": 3.924050632911393e-06, |
|
"loss": 0.0, |
|
"step": 5110 |
|
}, |
|
{ |
|
"epoch": 63.01012658227848, |
|
"grad_norm": 0.008889904245734215, |
|
"learning_rate": 3.909985935302391e-06, |
|
"loss": 0.0, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 63.01012658227848, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 4.606178283691406, |
|
"eval_runtime": 8.459, |
|
"eval_samples_per_second": 5.556, |
|
"eval_steps_per_second": 1.419, |
|
"step": 5120 |
|
}, |
|
{ |
|
"epoch": 64.00126582278482, |
|
"grad_norm": 0.0015669207787141204, |
|
"learning_rate": 3.89592123769339e-06, |
|
"loss": 0.0, |
|
"step": 5130 |
|
}, |
|
{ |
|
"epoch": 64.00253164556962, |
|
"grad_norm": 0.0011807240080088377, |
|
"learning_rate": 3.8818565400843886e-06, |
|
"loss": 0.0, |
|
"step": 5140 |
|
}, |
|
{ |
|
"epoch": 64.00379746835443, |
|
"grad_norm": 0.0015742299146950245, |
|
"learning_rate": 3.8677918424753865e-06, |
|
"loss": 0.0, |
|
"step": 5150 |
|
}, |
|
{ |
|
"epoch": 64.00506329113924, |
|
"grad_norm": 0.004820580128580332, |
|
"learning_rate": 3.853727144866385e-06, |
|
"loss": 0.0, |
|
"step": 5160 |
|
}, |
|
{ |
|
"epoch": 64.00632911392405, |
|
"grad_norm": 0.0032741015311330557, |
|
"learning_rate": 3.839662447257384e-06, |
|
"loss": 0.0008, |
|
"step": 5170 |
|
}, |
|
{ |
|
"epoch": 64.00759493670886, |
|
"grad_norm": 0.0036417359951883554, |
|
"learning_rate": 3.825597749648383e-06, |
|
"loss": 0.0, |
|
"step": 5180 |
|
}, |
|
{ |
|
"epoch": 64.00886075949367, |
|
"grad_norm": 0.008372402749955654, |
|
"learning_rate": 3.811533052039382e-06, |
|
"loss": 0.0001, |
|
"step": 5190 |
|
}, |
|
{ |
|
"epoch": 64.01012658227847, |
|
"grad_norm": 0.003397272201254964, |
|
"learning_rate": 3.7974683544303802e-06, |
|
"loss": 0.0045, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 64.01012658227847, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 5.245887756347656, |
|
"eval_runtime": 8.4656, |
|
"eval_samples_per_second": 5.552, |
|
"eval_steps_per_second": 1.418, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 65.00126582278482, |
|
"grad_norm": 0.0019356166012585163, |
|
"learning_rate": 3.783403656821379e-06, |
|
"loss": 0.0014, |
|
"step": 5210 |
|
}, |
|
{ |
|
"epoch": 65.00253164556962, |
|
"grad_norm": 0.0013496861793100834, |
|
"learning_rate": 3.7693389592123775e-06, |
|
"loss": 0.0267, |
|
"step": 5220 |
|
}, |
|
{ |
|
"epoch": 65.00379746835443, |
|
"grad_norm": 0.04248461872339249, |
|
"learning_rate": 3.755274261603376e-06, |
|
"loss": 0.0002, |
|
"step": 5230 |
|
}, |
|
{ |
|
"epoch": 65.00506329113924, |
|
"grad_norm": 0.004868203774094582, |
|
"learning_rate": 3.7412095639943747e-06, |
|
"loss": 0.002, |
|
"step": 5240 |
|
}, |
|
{ |
|
"epoch": 65.00632911392405, |
|
"grad_norm": 0.0008925410802476108, |
|
"learning_rate": 3.727144866385373e-06, |
|
"loss": 0.0, |
|
"step": 5250 |
|
}, |
|
{ |
|
"epoch": 65.00759493670886, |
|
"grad_norm": 0.0019144342513754964, |
|
"learning_rate": 3.713080168776372e-06, |
|
"loss": 0.0, |
|
"step": 5260 |
|
}, |
|
{ |
|
"epoch": 65.00886075949367, |
|
"grad_norm": 0.001448463648557663, |
|
"learning_rate": 3.6990154711673703e-06, |
|
"loss": 0.0001, |
|
"step": 5270 |
|
}, |
|
{ |
|
"epoch": 65.01012658227847, |
|
"grad_norm": 0.0024687196128070354, |
|
"learning_rate": 3.684950773558369e-06, |
|
"loss": 0.0943, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 65.01012658227847, |
|
"eval_accuracy": 0.3617021276595745, |
|
"eval_loss": 5.0856242179870605, |
|
"eval_runtime": 8.4623, |
|
"eval_samples_per_second": 5.554, |
|
"eval_steps_per_second": 1.418, |
|
"step": 5280 |
|
}, |
|
{ |
|
"epoch": 66.00126582278482, |
|
"grad_norm": 0.0016155457124114037, |
|
"learning_rate": 3.6708860759493675e-06, |
|
"loss": 0.0001, |
|
"step": 5290 |
|
}, |
|
{ |
|
"epoch": 66.00253164556962, |
|
"grad_norm": 0.17640484869480133, |
|
"learning_rate": 3.656821378340366e-06, |
|
"loss": 0.0001, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 66.00379746835443, |
|
"grad_norm": 0.0015737387584522367, |
|
"learning_rate": 3.6427566807313647e-06, |
|
"loss": 0.0, |
|
"step": 5310 |
|
}, |
|
{ |
|
"epoch": 66.00506329113924, |
|
"grad_norm": 0.015487028285861015, |
|
"learning_rate": 3.628691983122363e-06, |
|
"loss": 0.0001, |
|
"step": 5320 |
|
}, |
|
{ |
|
"epoch": 66.00632911392405, |
|
"grad_norm": 0.0009026491898111999, |
|
"learning_rate": 3.614627285513362e-06, |
|
"loss": 0.0143, |
|
"step": 5330 |
|
}, |
|
{ |
|
"epoch": 66.00759493670886, |
|
"grad_norm": 0.0015520367305725813, |
|
"learning_rate": 3.6005625879043604e-06, |
|
"loss": 0.0048, |
|
"step": 5340 |
|
}, |
|
{ |
|
"epoch": 66.00886075949367, |
|
"grad_norm": 0.007421619724482298, |
|
"learning_rate": 3.586497890295359e-06, |
|
"loss": 0.0, |
|
"step": 5350 |
|
}, |
|
{ |
|
"epoch": 66.01012658227847, |
|
"grad_norm": 0.0009921834571287036, |
|
"learning_rate": 3.5724331926863576e-06, |
|
"loss": 0.0002, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 66.01012658227847, |
|
"eval_accuracy": 0.48936170212765956, |
|
"eval_loss": 4.449216365814209, |
|
"eval_runtime": 8.455, |
|
"eval_samples_per_second": 5.559, |
|
"eval_steps_per_second": 1.419, |
|
"step": 5360 |
|
}, |
|
{ |
|
"epoch": 67.00126582278482, |
|
"grad_norm": 0.009655151516199112, |
|
"learning_rate": 3.558368495077356e-06, |
|
"loss": 0.0001, |
|
"step": 5370 |
|
}, |
|
{ |
|
"epoch": 67.00253164556962, |
|
"grad_norm": 0.004027712158858776, |
|
"learning_rate": 3.544303797468355e-06, |
|
"loss": 0.0001, |
|
"step": 5380 |
|
}, |
|
{ |
|
"epoch": 67.00379746835443, |
|
"grad_norm": 0.002234363229945302, |
|
"learning_rate": 3.530239099859353e-06, |
|
"loss": 0.0001, |
|
"step": 5390 |
|
}, |
|
{ |
|
"epoch": 67.00506329113924, |
|
"grad_norm": 0.001890279003418982, |
|
"learning_rate": 3.516174402250352e-06, |
|
"loss": 0.0001, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 67.00632911392405, |
|
"grad_norm": 0.0018905351171270013, |
|
"learning_rate": 3.5021097046413504e-06, |
|
"loss": 0.0, |
|
"step": 5410 |
|
}, |
|
{ |
|
"epoch": 67.00759493670886, |
|
"grad_norm": 0.001428403309546411, |
|
"learning_rate": 3.4880450070323492e-06, |
|
"loss": 0.0001, |
|
"step": 5420 |
|
}, |
|
{ |
|
"epoch": 67.00886075949367, |
|
"grad_norm": 0.001419195905327797, |
|
"learning_rate": 3.4739803094233476e-06, |
|
"loss": 0.0924, |
|
"step": 5430 |
|
}, |
|
{ |
|
"epoch": 67.01012658227847, |
|
"grad_norm": 0.0011965942103415728, |
|
"learning_rate": 3.459915611814346e-06, |
|
"loss": 0.0002, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 67.01012658227847, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 5.17952299118042, |
|
"eval_runtime": 8.4394, |
|
"eval_samples_per_second": 5.569, |
|
"eval_steps_per_second": 1.422, |
|
"step": 5440 |
|
}, |
|
{ |
|
"epoch": 68.00126582278482, |
|
"grad_norm": 0.004659404046833515, |
|
"learning_rate": 3.445850914205345e-06, |
|
"loss": 0.0001, |
|
"step": 5450 |
|
}, |
|
{ |
|
"epoch": 68.00253164556962, |
|
"grad_norm": 282.0872497558594, |
|
"learning_rate": 3.4317862165963433e-06, |
|
"loss": 0.0246, |
|
"step": 5460 |
|
}, |
|
{ |
|
"epoch": 68.00379746835443, |
|
"grad_norm": 0.19597108662128448, |
|
"learning_rate": 3.417721518987342e-06, |
|
"loss": 0.0001, |
|
"step": 5470 |
|
}, |
|
{ |
|
"epoch": 68.00506329113924, |
|
"grad_norm": 0.0020114402286708355, |
|
"learning_rate": 3.4036568213783405e-06, |
|
"loss": 0.0449, |
|
"step": 5480 |
|
}, |
|
{ |
|
"epoch": 68.00632911392405, |
|
"grad_norm": 0.0017703929916024208, |
|
"learning_rate": 3.3895921237693393e-06, |
|
"loss": 0.0001, |
|
"step": 5490 |
|
}, |
|
{ |
|
"epoch": 68.00759493670886, |
|
"grad_norm": 0.005612295586615801, |
|
"learning_rate": 3.3755274261603377e-06, |
|
"loss": 0.0, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 68.00886075949367, |
|
"grad_norm": 0.002703710226342082, |
|
"learning_rate": 3.361462728551336e-06, |
|
"loss": 0.0001, |
|
"step": 5510 |
|
}, |
|
{ |
|
"epoch": 68.01012658227847, |
|
"grad_norm": 0.0033236260060220957, |
|
"learning_rate": 3.347398030942335e-06, |
|
"loss": 0.0007, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 68.01012658227847, |
|
"eval_accuracy": 0.46808510638297873, |
|
"eval_loss": 4.3201751708984375, |
|
"eval_runtime": 8.4512, |
|
"eval_samples_per_second": 5.561, |
|
"eval_steps_per_second": 1.42, |
|
"step": 5520 |
|
}, |
|
{ |
|
"epoch": 69.00126582278482, |
|
"grad_norm": 0.13362517952919006, |
|
"learning_rate": 3.3333333333333333e-06, |
|
"loss": 0.0001, |
|
"step": 5530 |
|
}, |
|
{ |
|
"epoch": 69.00253164556962, |
|
"grad_norm": 0.0022546211257576942, |
|
"learning_rate": 3.319268635724332e-06, |
|
"loss": 0.1001, |
|
"step": 5540 |
|
}, |
|
{ |
|
"epoch": 69.00379746835443, |
|
"grad_norm": 0.002193002263084054, |
|
"learning_rate": 3.3052039381153305e-06, |
|
"loss": 0.0, |
|
"step": 5550 |
|
}, |
|
{ |
|
"epoch": 69.00506329113924, |
|
"grad_norm": 0.0027829715982079506, |
|
"learning_rate": 3.2911392405063294e-06, |
|
"loss": 0.0061, |
|
"step": 5560 |
|
}, |
|
{ |
|
"epoch": 69.00632911392405, |
|
"grad_norm": 0.0031120802741497755, |
|
"learning_rate": 3.2770745428973278e-06, |
|
"loss": 0.0001, |
|
"step": 5570 |
|
}, |
|
{ |
|
"epoch": 69.00759493670886, |
|
"grad_norm": 0.001309010898694396, |
|
"learning_rate": 3.263009845288326e-06, |
|
"loss": 0.0001, |
|
"step": 5580 |
|
}, |
|
{ |
|
"epoch": 69.00886075949367, |
|
"grad_norm": 0.028802473098039627, |
|
"learning_rate": 3.248945147679325e-06, |
|
"loss": 0.004, |
|
"step": 5590 |
|
}, |
|
{ |
|
"epoch": 69.01012658227847, |
|
"grad_norm": 0.0005848377477377653, |
|
"learning_rate": 3.2348804500703242e-06, |
|
"loss": 0.1678, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 69.01012658227847, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 4.868789196014404, |
|
"eval_runtime": 8.4887, |
|
"eval_samples_per_second": 5.537, |
|
"eval_steps_per_second": 1.414, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 70.00126582278482, |
|
"grad_norm": 0.0012933706166222692, |
|
"learning_rate": 3.2208157524613226e-06, |
|
"loss": 0.0002, |
|
"step": 5610 |
|
}, |
|
{ |
|
"epoch": 70.00253164556962, |
|
"grad_norm": 0.02926361933350563, |
|
"learning_rate": 3.206751054852321e-06, |
|
"loss": 0.0041, |
|
"step": 5620 |
|
}, |
|
{ |
|
"epoch": 70.00379746835443, |
|
"grad_norm": 0.001349663594737649, |
|
"learning_rate": 3.19268635724332e-06, |
|
"loss": 0.0003, |
|
"step": 5630 |
|
}, |
|
{ |
|
"epoch": 70.00506329113924, |
|
"grad_norm": 0.002003163332119584, |
|
"learning_rate": 3.1786216596343183e-06, |
|
"loss": 0.0, |
|
"step": 5640 |
|
}, |
|
{ |
|
"epoch": 70.00632911392405, |
|
"grad_norm": 0.011114409193396568, |
|
"learning_rate": 3.164556962025317e-06, |
|
"loss": 0.0001, |
|
"step": 5650 |
|
}, |
|
{ |
|
"epoch": 70.00759493670886, |
|
"grad_norm": 0.004937044810503721, |
|
"learning_rate": 3.1504922644163155e-06, |
|
"loss": 0.0, |
|
"step": 5660 |
|
}, |
|
{ |
|
"epoch": 70.00886075949367, |
|
"grad_norm": 0.01026509702205658, |
|
"learning_rate": 3.1364275668073143e-06, |
|
"loss": 0.0001, |
|
"step": 5670 |
|
}, |
|
{ |
|
"epoch": 70.01012658227847, |
|
"grad_norm": 0.00430481368675828, |
|
"learning_rate": 3.1223628691983127e-06, |
|
"loss": 0.0001, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 70.01012658227847, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 5.288034439086914, |
|
"eval_runtime": 8.477, |
|
"eval_samples_per_second": 5.544, |
|
"eval_steps_per_second": 1.416, |
|
"step": 5680 |
|
}, |
|
{ |
|
"epoch": 71.00126582278482, |
|
"grad_norm": 0.0036116482224315405, |
|
"learning_rate": 3.108298171589311e-06, |
|
"loss": 0.0, |
|
"step": 5690 |
|
}, |
|
{ |
|
"epoch": 71.00253164556962, |
|
"grad_norm": 0.015287871472537518, |
|
"learning_rate": 3.09423347398031e-06, |
|
"loss": 0.0001, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 71.00379746835443, |
|
"grad_norm": 0.008669455535709858, |
|
"learning_rate": 3.0801687763713083e-06, |
|
"loss": 0.0, |
|
"step": 5710 |
|
}, |
|
{ |
|
"epoch": 71.00506329113924, |
|
"grad_norm": 0.019757656380534172, |
|
"learning_rate": 3.066104078762307e-06, |
|
"loss": 0.0001, |
|
"step": 5720 |
|
}, |
|
{ |
|
"epoch": 71.00632911392405, |
|
"grad_norm": 0.012890863232314587, |
|
"learning_rate": 3.0520393811533055e-06, |
|
"loss": 0.0, |
|
"step": 5730 |
|
}, |
|
{ |
|
"epoch": 71.00759493670886, |
|
"grad_norm": 0.0019587704446166754, |
|
"learning_rate": 3.037974683544304e-06, |
|
"loss": 0.0, |
|
"step": 5740 |
|
}, |
|
{ |
|
"epoch": 71.00886075949367, |
|
"grad_norm": 0.0018708609277382493, |
|
"learning_rate": 3.0239099859353028e-06, |
|
"loss": 0.0, |
|
"step": 5750 |
|
}, |
|
{ |
|
"epoch": 71.01012658227847, |
|
"grad_norm": 0.003253462491557002, |
|
"learning_rate": 3.009845288326301e-06, |
|
"loss": 0.0, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 71.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 5.115118503570557, |
|
"eval_runtime": 8.4566, |
|
"eval_samples_per_second": 5.558, |
|
"eval_steps_per_second": 1.419, |
|
"step": 5760 |
|
}, |
|
{ |
|
"epoch": 72.00126582278482, |
|
"grad_norm": 0.001443624496459961, |
|
"learning_rate": 2.9957805907173e-06, |
|
"loss": 0.0, |
|
"step": 5770 |
|
}, |
|
{ |
|
"epoch": 72.00253164556962, |
|
"grad_norm": 0.0025624725967645645, |
|
"learning_rate": 2.9817158931082984e-06, |
|
"loss": 0.0026, |
|
"step": 5780 |
|
}, |
|
{ |
|
"epoch": 72.00379746835443, |
|
"grad_norm": 0.00680403271690011, |
|
"learning_rate": 2.967651195499297e-06, |
|
"loss": 0.0, |
|
"step": 5790 |
|
}, |
|
{ |
|
"epoch": 72.00506329113924, |
|
"grad_norm": 0.0030975525733083487, |
|
"learning_rate": 2.9535864978902956e-06, |
|
"loss": 0.0, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 72.00632911392405, |
|
"grad_norm": 0.019320061430335045, |
|
"learning_rate": 2.939521800281294e-06, |
|
"loss": 0.2163, |
|
"step": 5810 |
|
}, |
|
{ |
|
"epoch": 72.00759493670886, |
|
"grad_norm": 0.07163013517856598, |
|
"learning_rate": 2.925457102672293e-06, |
|
"loss": 0.0001, |
|
"step": 5820 |
|
}, |
|
{ |
|
"epoch": 72.00886075949367, |
|
"grad_norm": 0.0038794104475528, |
|
"learning_rate": 2.9113924050632912e-06, |
|
"loss": 0.1065, |
|
"step": 5830 |
|
}, |
|
{ |
|
"epoch": 72.01012658227847, |
|
"grad_norm": 0.0027189133688807487, |
|
"learning_rate": 2.89732770745429e-06, |
|
"loss": 0.0005, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 72.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.566655158996582, |
|
"eval_runtime": 8.4516, |
|
"eval_samples_per_second": 5.561, |
|
"eval_steps_per_second": 1.42, |
|
"step": 5840 |
|
}, |
|
{ |
|
"epoch": 73.00126582278482, |
|
"grad_norm": 0.0017171819927170873, |
|
"learning_rate": 2.8832630098452884e-06, |
|
"loss": 0.0001, |
|
"step": 5850 |
|
}, |
|
{ |
|
"epoch": 73.00253164556962, |
|
"grad_norm": 0.0033329855650663376, |
|
"learning_rate": 2.8691983122362873e-06, |
|
"loss": 0.0, |
|
"step": 5860 |
|
}, |
|
{ |
|
"epoch": 73.00379746835443, |
|
"grad_norm": 0.008366705849766731, |
|
"learning_rate": 2.8551336146272857e-06, |
|
"loss": 0.0001, |
|
"step": 5870 |
|
}, |
|
{ |
|
"epoch": 73.00506329113924, |
|
"grad_norm": 0.0013916816096752882, |
|
"learning_rate": 2.841068917018284e-06, |
|
"loss": 0.0001, |
|
"step": 5880 |
|
}, |
|
{ |
|
"epoch": 73.00632911392405, |
|
"grad_norm": 0.001828556414693594, |
|
"learning_rate": 2.827004219409283e-06, |
|
"loss": 0.0, |
|
"step": 5890 |
|
}, |
|
{ |
|
"epoch": 73.00759493670886, |
|
"grad_norm": 0.002120325807482004, |
|
"learning_rate": 2.8129395218002813e-06, |
|
"loss": 0.0, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 73.00886075949367, |
|
"grad_norm": 0.0018465804168954492, |
|
"learning_rate": 2.79887482419128e-06, |
|
"loss": 0.0, |
|
"step": 5910 |
|
}, |
|
{ |
|
"epoch": 73.01012658227847, |
|
"grad_norm": 0.0017947384621948004, |
|
"learning_rate": 2.7848101265822785e-06, |
|
"loss": 0.0, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 73.01012658227847, |
|
"eval_accuracy": 0.46808510638297873, |
|
"eval_loss": 4.288333415985107, |
|
"eval_runtime": 8.4999, |
|
"eval_samples_per_second": 5.529, |
|
"eval_steps_per_second": 1.412, |
|
"step": 5920 |
|
}, |
|
{ |
|
"epoch": 74.00126582278482, |
|
"grad_norm": 0.0011507548624649644, |
|
"learning_rate": 2.7707454289732773e-06, |
|
"loss": 0.0001, |
|
"step": 5930 |
|
}, |
|
{ |
|
"epoch": 74.00253164556962, |
|
"grad_norm": 0.001057163462974131, |
|
"learning_rate": 2.7566807313642757e-06, |
|
"loss": 0.0002, |
|
"step": 5940 |
|
}, |
|
{ |
|
"epoch": 74.00379746835443, |
|
"grad_norm": 0.001940654474310577, |
|
"learning_rate": 2.742616033755274e-06, |
|
"loss": 0.0001, |
|
"step": 5950 |
|
}, |
|
{ |
|
"epoch": 74.00506329113924, |
|
"grad_norm": 0.013309543952345848, |
|
"learning_rate": 2.728551336146273e-06, |
|
"loss": 0.1043, |
|
"step": 5960 |
|
}, |
|
{ |
|
"epoch": 74.00632911392405, |
|
"grad_norm": 0.003933705855160952, |
|
"learning_rate": 2.7144866385372713e-06, |
|
"loss": 0.0, |
|
"step": 5970 |
|
}, |
|
{ |
|
"epoch": 74.00759493670886, |
|
"grad_norm": 0.0016960457433015108, |
|
"learning_rate": 2.70042194092827e-06, |
|
"loss": 0.0011, |
|
"step": 5980 |
|
}, |
|
{ |
|
"epoch": 74.00886075949367, |
|
"grad_norm": 0.0025782466400414705, |
|
"learning_rate": 2.6863572433192686e-06, |
|
"loss": 0.0001, |
|
"step": 5990 |
|
}, |
|
{ |
|
"epoch": 74.01012658227847, |
|
"grad_norm": 0.0025307261385023594, |
|
"learning_rate": 2.6722925457102674e-06, |
|
"loss": 0.0, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 74.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.684779167175293, |
|
"eval_runtime": 8.5153, |
|
"eval_samples_per_second": 5.519, |
|
"eval_steps_per_second": 1.409, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 75.00126582278482, |
|
"grad_norm": 0.0016025023069232702, |
|
"learning_rate": 2.6582278481012658e-06, |
|
"loss": 0.1562, |
|
"step": 6010 |
|
}, |
|
{ |
|
"epoch": 75.00253164556962, |
|
"grad_norm": 0.0006585910450667143, |
|
"learning_rate": 2.644163150492265e-06, |
|
"loss": 0.0001, |
|
"step": 6020 |
|
}, |
|
{ |
|
"epoch": 75.00379746835443, |
|
"grad_norm": 0.0031663491390645504, |
|
"learning_rate": 2.6300984528832634e-06, |
|
"loss": 0.0, |
|
"step": 6030 |
|
}, |
|
{ |
|
"epoch": 75.00506329113924, |
|
"grad_norm": 0.015673287212848663, |
|
"learning_rate": 2.6160337552742622e-06, |
|
"loss": 0.0, |
|
"step": 6040 |
|
}, |
|
{ |
|
"epoch": 75.00632911392405, |
|
"grad_norm": 0.003231970127671957, |
|
"learning_rate": 2.6019690576652606e-06, |
|
"loss": 0.0, |
|
"step": 6050 |
|
}, |
|
{ |
|
"epoch": 75.00759493670886, |
|
"grad_norm": 0.0015489223878830671, |
|
"learning_rate": 2.587904360056259e-06, |
|
"loss": 0.0, |
|
"step": 6060 |
|
}, |
|
{ |
|
"epoch": 75.00886075949367, |
|
"grad_norm": 18.648025512695312, |
|
"learning_rate": 2.573839662447258e-06, |
|
"loss": 0.0019, |
|
"step": 6070 |
|
}, |
|
{ |
|
"epoch": 75.01012658227847, |
|
"grad_norm": 0.00114185712300241, |
|
"learning_rate": 2.5597749648382563e-06, |
|
"loss": 0.0, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 75.01012658227847, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.815650939941406, |
|
"eval_runtime": 8.5452, |
|
"eval_samples_per_second": 5.5, |
|
"eval_steps_per_second": 1.404, |
|
"step": 6080 |
|
}, |
|
{ |
|
"epoch": 76.00126582278482, |
|
"grad_norm": 0.20001061260700226, |
|
"learning_rate": 2.545710267229255e-06, |
|
"loss": 0.0001, |
|
"step": 6090 |
|
}, |
|
{ |
|
"epoch": 76.00253164556962, |
|
"grad_norm": 0.002338194055482745, |
|
"learning_rate": 2.5316455696202535e-06, |
|
"loss": 0.0, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 76.00379746835443, |
|
"grad_norm": 0.004149795509874821, |
|
"learning_rate": 2.5175808720112523e-06, |
|
"loss": 0.0, |
|
"step": 6110 |
|
}, |
|
{ |
|
"epoch": 76.00506329113924, |
|
"grad_norm": 0.0017416217597201467, |
|
"learning_rate": 2.5035161744022507e-06, |
|
"loss": 0.0003, |
|
"step": 6120 |
|
}, |
|
{ |
|
"epoch": 76.00632911392405, |
|
"grad_norm": 0.0011654688278213143, |
|
"learning_rate": 2.489451476793249e-06, |
|
"loss": 0.0, |
|
"step": 6130 |
|
}, |
|
{ |
|
"epoch": 76.00759493670886, |
|
"grad_norm": 0.0034060347825288773, |
|
"learning_rate": 2.475386779184248e-06, |
|
"loss": 0.0, |
|
"step": 6140 |
|
}, |
|
{ |
|
"epoch": 76.00886075949367, |
|
"grad_norm": 0.0008188936626538634, |
|
"learning_rate": 2.4613220815752463e-06, |
|
"loss": 0.0, |
|
"step": 6150 |
|
}, |
|
{ |
|
"epoch": 76.01012658227847, |
|
"grad_norm": 0.0015720352530479431, |
|
"learning_rate": 2.447257383966245e-06, |
|
"loss": 0.0, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 76.01012658227847, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.8248186111450195, |
|
"eval_runtime": 8.5505, |
|
"eval_samples_per_second": 5.497, |
|
"eval_steps_per_second": 1.403, |
|
"step": 6160 |
|
}, |
|
{ |
|
"epoch": 77.00126582278482, |
|
"grad_norm": 0.007548962719738483, |
|
"learning_rate": 2.4331926863572436e-06, |
|
"loss": 0.0, |
|
"step": 6170 |
|
}, |
|
{ |
|
"epoch": 77.00253164556962, |
|
"grad_norm": 0.00578302051872015, |
|
"learning_rate": 2.4191279887482424e-06, |
|
"loss": 0.0, |
|
"step": 6180 |
|
}, |
|
{ |
|
"epoch": 77.00379746835443, |
|
"grad_norm": 0.0033245827071368694, |
|
"learning_rate": 2.4050632911392408e-06, |
|
"loss": 0.0, |
|
"step": 6190 |
|
}, |
|
{ |
|
"epoch": 77.00506329113924, |
|
"grad_norm": 0.0008535035303793848, |
|
"learning_rate": 2.390998593530239e-06, |
|
"loss": 0.0015, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 77.00632911392405, |
|
"grad_norm": 0.0016984603134915233, |
|
"learning_rate": 2.376933895921238e-06, |
|
"loss": 0.0001, |
|
"step": 6210 |
|
}, |
|
{ |
|
"epoch": 77.00759493670886, |
|
"grad_norm": 0.0014544576406478882, |
|
"learning_rate": 2.3628691983122364e-06, |
|
"loss": 0.0045, |
|
"step": 6220 |
|
}, |
|
{ |
|
"epoch": 77.00886075949367, |
|
"grad_norm": 0.0007344476762227714, |
|
"learning_rate": 2.3488045007032352e-06, |
|
"loss": 0.0, |
|
"step": 6230 |
|
}, |
|
{ |
|
"epoch": 77.01012658227847, |
|
"grad_norm": 0.014698284678161144, |
|
"learning_rate": 2.3347398030942336e-06, |
|
"loss": 0.0, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 77.01012658227847, |
|
"eval_accuracy": 0.48936170212765956, |
|
"eval_loss": 4.5635786056518555, |
|
"eval_runtime": 8.4512, |
|
"eval_samples_per_second": 5.561, |
|
"eval_steps_per_second": 1.42, |
|
"step": 6240 |
|
}, |
|
{ |
|
"epoch": 78.00126582278482, |
|
"grad_norm": 0.0009909283835440874, |
|
"learning_rate": 2.3206751054852324e-06, |
|
"loss": 0.0019, |
|
"step": 6250 |
|
}, |
|
{ |
|
"epoch": 78.00253164556962, |
|
"grad_norm": 0.020323097705841064, |
|
"learning_rate": 2.306610407876231e-06, |
|
"loss": 0.0, |
|
"step": 6260 |
|
}, |
|
{ |
|
"epoch": 78.00379746835443, |
|
"grad_norm": 0.0027961665764451027, |
|
"learning_rate": 2.2925457102672292e-06, |
|
"loss": 0.0001, |
|
"step": 6270 |
|
}, |
|
{ |
|
"epoch": 78.00506329113924, |
|
"grad_norm": 0.0009632346336729825, |
|
"learning_rate": 2.278481012658228e-06, |
|
"loss": 0.0, |
|
"step": 6280 |
|
}, |
|
{ |
|
"epoch": 78.00632911392405, |
|
"grad_norm": 0.005322067067027092, |
|
"learning_rate": 2.2644163150492265e-06, |
|
"loss": 0.1958, |
|
"step": 6290 |
|
}, |
|
{ |
|
"epoch": 78.00759493670886, |
|
"grad_norm": 0.002847396768629551, |
|
"learning_rate": 2.2503516174402253e-06, |
|
"loss": 0.0, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 78.00886075949367, |
|
"grad_norm": 0.002737447852268815, |
|
"learning_rate": 2.2362869198312237e-06, |
|
"loss": 0.0, |
|
"step": 6310 |
|
}, |
|
{ |
|
"epoch": 78.01012658227847, |
|
"grad_norm": 0.036222778260707855, |
|
"learning_rate": 2.222222222222222e-06, |
|
"loss": 0.0, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 78.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.581666469573975, |
|
"eval_runtime": 8.4752, |
|
"eval_samples_per_second": 5.546, |
|
"eval_steps_per_second": 1.416, |
|
"step": 6320 |
|
}, |
|
{ |
|
"epoch": 79.00126582278482, |
|
"grad_norm": 0.001555649214424193, |
|
"learning_rate": 2.208157524613221e-06, |
|
"loss": 0.0, |
|
"step": 6330 |
|
}, |
|
{ |
|
"epoch": 79.00253164556962, |
|
"grad_norm": 0.0009841909632086754, |
|
"learning_rate": 2.1940928270042197e-06, |
|
"loss": 0.0, |
|
"step": 6340 |
|
}, |
|
{ |
|
"epoch": 79.00379746835443, |
|
"grad_norm": 0.0069242678582668304, |
|
"learning_rate": 2.180028129395218e-06, |
|
"loss": 0.0, |
|
"step": 6350 |
|
}, |
|
{ |
|
"epoch": 79.00506329113924, |
|
"grad_norm": 0.0031804998870939016, |
|
"learning_rate": 2.165963431786217e-06, |
|
"loss": 0.0, |
|
"step": 6360 |
|
}, |
|
{ |
|
"epoch": 79.00632911392405, |
|
"grad_norm": 0.0010907890973612666, |
|
"learning_rate": 2.1518987341772153e-06, |
|
"loss": 0.0, |
|
"step": 6370 |
|
}, |
|
{ |
|
"epoch": 79.00759493670886, |
|
"grad_norm": 0.0008229253580793738, |
|
"learning_rate": 2.137834036568214e-06, |
|
"loss": 0.0001, |
|
"step": 6380 |
|
}, |
|
{ |
|
"epoch": 79.00886075949367, |
|
"grad_norm": 0.004569868091493845, |
|
"learning_rate": 2.1237693389592126e-06, |
|
"loss": 0.0, |
|
"step": 6390 |
|
}, |
|
{ |
|
"epoch": 79.01012658227847, |
|
"grad_norm": 0.0017135925590991974, |
|
"learning_rate": 2.1097046413502114e-06, |
|
"loss": 0.0001, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 79.01012658227847, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 4.774336338043213, |
|
"eval_runtime": 8.469, |
|
"eval_samples_per_second": 5.55, |
|
"eval_steps_per_second": 1.417, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 80.00126582278482, |
|
"grad_norm": 0.0009415835957042873, |
|
"learning_rate": 2.0956399437412098e-06, |
|
"loss": 0.0, |
|
"step": 6410 |
|
}, |
|
{ |
|
"epoch": 80.00253164556962, |
|
"grad_norm": 0.0011497698724269867, |
|
"learning_rate": 2.081575246132208e-06, |
|
"loss": 0.0, |
|
"step": 6420 |
|
}, |
|
{ |
|
"epoch": 80.00379746835443, |
|
"grad_norm": 0.0016221057157963514, |
|
"learning_rate": 2.067510548523207e-06, |
|
"loss": 0.0, |
|
"step": 6430 |
|
}, |
|
{ |
|
"epoch": 80.00506329113924, |
|
"grad_norm": 0.002268084790557623, |
|
"learning_rate": 2.0534458509142054e-06, |
|
"loss": 0.0, |
|
"step": 6440 |
|
}, |
|
{ |
|
"epoch": 80.00632911392405, |
|
"grad_norm": 0.0011354024754837155, |
|
"learning_rate": 2.0393811533052042e-06, |
|
"loss": 0.0, |
|
"step": 6450 |
|
}, |
|
{ |
|
"epoch": 80.00759493670886, |
|
"grad_norm": 0.002358433324843645, |
|
"learning_rate": 2.0253164556962026e-06, |
|
"loss": 0.0004, |
|
"step": 6460 |
|
}, |
|
{ |
|
"epoch": 80.00886075949367, |
|
"grad_norm": 0.0013280572602525353, |
|
"learning_rate": 2.0112517580872014e-06, |
|
"loss": 0.0, |
|
"step": 6470 |
|
}, |
|
{ |
|
"epoch": 80.01012658227847, |
|
"grad_norm": 0.006725861690938473, |
|
"learning_rate": 1.9971870604782e-06, |
|
"loss": 0.0001, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 80.01012658227847, |
|
"eval_accuracy": 0.40425531914893614, |
|
"eval_loss": 4.900009632110596, |
|
"eval_runtime": 8.4183, |
|
"eval_samples_per_second": 5.583, |
|
"eval_steps_per_second": 1.425, |
|
"step": 6480 |
|
}, |
|
{ |
|
"epoch": 81.00126582278482, |
|
"grad_norm": 0.0010699324775487185, |
|
"learning_rate": 1.9831223628691982e-06, |
|
"loss": 0.1826, |
|
"step": 6490 |
|
}, |
|
{ |
|
"epoch": 81.00253164556962, |
|
"grad_norm": 0.006973781157284975, |
|
"learning_rate": 1.969057665260197e-06, |
|
"loss": 0.0, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 81.00379746835443, |
|
"grad_norm": 0.003398946486413479, |
|
"learning_rate": 1.9549929676511955e-06, |
|
"loss": 0.0, |
|
"step": 6510 |
|
}, |
|
{ |
|
"epoch": 81.00506329113924, |
|
"grad_norm": 0.009173160418868065, |
|
"learning_rate": 1.9409282700421943e-06, |
|
"loss": 0.0005, |
|
"step": 6520 |
|
}, |
|
{ |
|
"epoch": 81.00632911392405, |
|
"grad_norm": 0.07392571866512299, |
|
"learning_rate": 1.9268635724331927e-06, |
|
"loss": 0.0361, |
|
"step": 6530 |
|
}, |
|
{ |
|
"epoch": 81.00759493670886, |
|
"grad_norm": 0.001574037130922079, |
|
"learning_rate": 1.9127988748241915e-06, |
|
"loss": 0.0, |
|
"step": 6540 |
|
}, |
|
{ |
|
"epoch": 81.00886075949367, |
|
"grad_norm": 0.0032889668364077806, |
|
"learning_rate": 1.8987341772151901e-06, |
|
"loss": 0.0, |
|
"step": 6550 |
|
}, |
|
{ |
|
"epoch": 81.01012658227847, |
|
"grad_norm": 0.002083021914586425, |
|
"learning_rate": 1.8846694796061887e-06, |
|
"loss": 0.0002, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 81.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.766859531402588, |
|
"eval_runtime": 8.4247, |
|
"eval_samples_per_second": 5.579, |
|
"eval_steps_per_second": 1.424, |
|
"step": 6560 |
|
}, |
|
{ |
|
"epoch": 82.00126582278482, |
|
"grad_norm": 0.001750531722791493, |
|
"learning_rate": 1.8706047819971873e-06, |
|
"loss": 0.0, |
|
"step": 6570 |
|
}, |
|
{ |
|
"epoch": 82.00253164556962, |
|
"grad_norm": 0.0014307881938293576, |
|
"learning_rate": 1.856540084388186e-06, |
|
"loss": 0.0, |
|
"step": 6580 |
|
}, |
|
{ |
|
"epoch": 82.00379746835443, |
|
"grad_norm": 0.0012166056549176574, |
|
"learning_rate": 1.8424753867791846e-06, |
|
"loss": 0.0, |
|
"step": 6590 |
|
}, |
|
{ |
|
"epoch": 82.00506329113924, |
|
"grad_norm": 0.0018584979698061943, |
|
"learning_rate": 1.828410689170183e-06, |
|
"loss": 0.0, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 82.00632911392405, |
|
"grad_norm": 0.0007850803667679429, |
|
"learning_rate": 1.8143459915611816e-06, |
|
"loss": 0.0, |
|
"step": 6610 |
|
}, |
|
{ |
|
"epoch": 82.00759493670886, |
|
"grad_norm": 0.00409714225679636, |
|
"learning_rate": 1.8002812939521802e-06, |
|
"loss": 0.0, |
|
"step": 6620 |
|
}, |
|
{ |
|
"epoch": 82.00886075949367, |
|
"grad_norm": 0.0014620538568124175, |
|
"learning_rate": 1.7862165963431788e-06, |
|
"loss": 0.0, |
|
"step": 6630 |
|
}, |
|
{ |
|
"epoch": 82.01012658227847, |
|
"grad_norm": 0.0011849668808281422, |
|
"learning_rate": 1.7721518987341774e-06, |
|
"loss": 0.0, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 82.01012658227847, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.8224687576293945, |
|
"eval_runtime": 8.4754, |
|
"eval_samples_per_second": 5.545, |
|
"eval_steps_per_second": 1.416, |
|
"step": 6640 |
|
}, |
|
{ |
|
"epoch": 83.00126582278482, |
|
"grad_norm": 0.0014004989061504602, |
|
"learning_rate": 1.758087201125176e-06, |
|
"loss": 0.0, |
|
"step": 6650 |
|
}, |
|
{ |
|
"epoch": 83.00253164556962, |
|
"grad_norm": 0.0015136294532567263, |
|
"learning_rate": 1.7440225035161746e-06, |
|
"loss": 0.0, |
|
"step": 6660 |
|
}, |
|
{ |
|
"epoch": 83.00379746835443, |
|
"grad_norm": 0.00371897267177701, |
|
"learning_rate": 1.729957805907173e-06, |
|
"loss": 0.0, |
|
"step": 6670 |
|
}, |
|
{ |
|
"epoch": 83.00506329113924, |
|
"grad_norm": 0.0008714126888662577, |
|
"learning_rate": 1.7158931082981716e-06, |
|
"loss": 0.0, |
|
"step": 6680 |
|
}, |
|
{ |
|
"epoch": 83.00632911392405, |
|
"grad_norm": 0.003846103325486183, |
|
"learning_rate": 1.7018284106891702e-06, |
|
"loss": 0.0, |
|
"step": 6690 |
|
}, |
|
{ |
|
"epoch": 83.00759493670886, |
|
"grad_norm": 0.0013878681929782033, |
|
"learning_rate": 1.6877637130801689e-06, |
|
"loss": 0.0, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 83.00886075949367, |
|
"grad_norm": 0.0011597294360399246, |
|
"learning_rate": 1.6736990154711675e-06, |
|
"loss": 0.0, |
|
"step": 6710 |
|
}, |
|
{ |
|
"epoch": 83.01012658227847, |
|
"grad_norm": 0.0006404958548955619, |
|
"learning_rate": 1.659634317862166e-06, |
|
"loss": 0.0, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 83.01012658227847, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.833099842071533, |
|
"eval_runtime": 8.4872, |
|
"eval_samples_per_second": 5.538, |
|
"eval_steps_per_second": 1.414, |
|
"step": 6720 |
|
}, |
|
{ |
|
"epoch": 84.00126582278482, |
|
"grad_norm": 0.0019264252623543143, |
|
"learning_rate": 1.6455696202531647e-06, |
|
"loss": 0.0, |
|
"step": 6730 |
|
}, |
|
{ |
|
"epoch": 84.00253164556962, |
|
"grad_norm": 0.001029732171446085, |
|
"learning_rate": 1.631504922644163e-06, |
|
"loss": 0.0005, |
|
"step": 6740 |
|
}, |
|
{ |
|
"epoch": 84.00379746835443, |
|
"grad_norm": 0.0017922447295859456, |
|
"learning_rate": 1.6174402250351621e-06, |
|
"loss": 0.0, |
|
"step": 6750 |
|
}, |
|
{ |
|
"epoch": 84.00506329113924, |
|
"grad_norm": 0.0012487670173868537, |
|
"learning_rate": 1.6033755274261605e-06, |
|
"loss": 0.0, |
|
"step": 6760 |
|
}, |
|
{ |
|
"epoch": 84.00632911392405, |
|
"grad_norm": 0.0014119717525318265, |
|
"learning_rate": 1.5893108298171591e-06, |
|
"loss": 0.0001, |
|
"step": 6770 |
|
}, |
|
{ |
|
"epoch": 84.00759493670886, |
|
"grad_norm": 0.0006965138600207865, |
|
"learning_rate": 1.5752461322081577e-06, |
|
"loss": 0.0, |
|
"step": 6780 |
|
}, |
|
{ |
|
"epoch": 84.00886075949367, |
|
"grad_norm": 0.0011428669095039368, |
|
"learning_rate": 1.5611814345991563e-06, |
|
"loss": 0.0, |
|
"step": 6790 |
|
}, |
|
{ |
|
"epoch": 84.01012658227847, |
|
"grad_norm": 0.002268004696816206, |
|
"learning_rate": 1.547116736990155e-06, |
|
"loss": 0.0, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 84.01012658227847, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.715381145477295, |
|
"eval_runtime": 8.3979, |
|
"eval_samples_per_second": 5.597, |
|
"eval_steps_per_second": 1.429, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 85.00126582278482, |
|
"grad_norm": 0.002592705423012376, |
|
"learning_rate": 1.5330520393811536e-06, |
|
"loss": 0.0, |
|
"step": 6810 |
|
}, |
|
{ |
|
"epoch": 85.00253164556962, |
|
"grad_norm": 0.04101519286632538, |
|
"learning_rate": 1.518987341772152e-06, |
|
"loss": 0.0, |
|
"step": 6820 |
|
}, |
|
{ |
|
"epoch": 85.00379746835443, |
|
"grad_norm": 0.0005958875990472734, |
|
"learning_rate": 1.5049226441631506e-06, |
|
"loss": 0.0, |
|
"step": 6830 |
|
}, |
|
{ |
|
"epoch": 85.00506329113924, |
|
"grad_norm": 0.0025226089637726545, |
|
"learning_rate": 1.4908579465541492e-06, |
|
"loss": 0.0, |
|
"step": 6840 |
|
}, |
|
{ |
|
"epoch": 85.00632911392405, |
|
"grad_norm": 0.0008681220351718366, |
|
"learning_rate": 1.4767932489451478e-06, |
|
"loss": 0.0, |
|
"step": 6850 |
|
}, |
|
{ |
|
"epoch": 85.00759493670886, |
|
"grad_norm": 0.0013401606120169163, |
|
"learning_rate": 1.4627285513361464e-06, |
|
"loss": 0.0, |
|
"step": 6860 |
|
}, |
|
{ |
|
"epoch": 85.00886075949367, |
|
"grad_norm": 0.0010737047996371984, |
|
"learning_rate": 1.448663853727145e-06, |
|
"loss": 0.0, |
|
"step": 6870 |
|
}, |
|
{ |
|
"epoch": 85.01012658227847, |
|
"grad_norm": 0.001385514042340219, |
|
"learning_rate": 1.4345991561181436e-06, |
|
"loss": 0.0, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 85.01012658227847, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.716861248016357, |
|
"eval_runtime": 8.4473, |
|
"eval_samples_per_second": 5.564, |
|
"eval_steps_per_second": 1.421, |
|
"step": 6880 |
|
}, |
|
{ |
|
"epoch": 86.00126582278482, |
|
"grad_norm": 0.0020737305749207735, |
|
"learning_rate": 1.420534458509142e-06, |
|
"loss": 0.0, |
|
"step": 6890 |
|
}, |
|
{ |
|
"epoch": 86.00253164556962, |
|
"grad_norm": 0.0013663348508998752, |
|
"learning_rate": 1.4064697609001406e-06, |
|
"loss": 0.0001, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 86.00379746835443, |
|
"grad_norm": 0.01127583533525467, |
|
"learning_rate": 1.3924050632911392e-06, |
|
"loss": 0.0, |
|
"step": 6910 |
|
}, |
|
{ |
|
"epoch": 86.00506329113924, |
|
"grad_norm": 0.002084192121401429, |
|
"learning_rate": 1.3783403656821379e-06, |
|
"loss": 0.0, |
|
"step": 6920 |
|
}, |
|
{ |
|
"epoch": 86.00632911392405, |
|
"grad_norm": 0.0009935207199305296, |
|
"learning_rate": 1.3642756680731365e-06, |
|
"loss": 0.0, |
|
"step": 6930 |
|
}, |
|
{ |
|
"epoch": 86.00759493670886, |
|
"grad_norm": 0.0008619350846856833, |
|
"learning_rate": 1.350210970464135e-06, |
|
"loss": 0.0, |
|
"step": 6940 |
|
}, |
|
{ |
|
"epoch": 86.00886075949367, |
|
"grad_norm": 0.000807570235338062, |
|
"learning_rate": 1.3361462728551337e-06, |
|
"loss": 0.0, |
|
"step": 6950 |
|
}, |
|
{ |
|
"epoch": 86.01012658227847, |
|
"grad_norm": 0.001797909731976688, |
|
"learning_rate": 1.3220815752461325e-06, |
|
"loss": 0.0, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 86.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.900410175323486, |
|
"eval_runtime": 8.4167, |
|
"eval_samples_per_second": 5.584, |
|
"eval_steps_per_second": 1.426, |
|
"step": 6960 |
|
}, |
|
{ |
|
"epoch": 87.00126582278482, |
|
"grad_norm": 0.0008401426021009684, |
|
"learning_rate": 1.3080168776371311e-06, |
|
"loss": 0.0, |
|
"step": 6970 |
|
}, |
|
{ |
|
"epoch": 87.00253164556962, |
|
"grad_norm": 0.0014646663330495358, |
|
"learning_rate": 1.2939521800281295e-06, |
|
"loss": 0.0, |
|
"step": 6980 |
|
}, |
|
{ |
|
"epoch": 87.00379746835443, |
|
"grad_norm": 0.0010633817873895168, |
|
"learning_rate": 1.2798874824191281e-06, |
|
"loss": 0.0, |
|
"step": 6990 |
|
}, |
|
{ |
|
"epoch": 87.00506329113924, |
|
"grad_norm": 0.0017103515565395355, |
|
"learning_rate": 1.2658227848101267e-06, |
|
"loss": 0.0, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 87.00632911392405, |
|
"grad_norm": 0.001976841827854514, |
|
"learning_rate": 1.2517580872011254e-06, |
|
"loss": 0.0, |
|
"step": 7010 |
|
}, |
|
{ |
|
"epoch": 87.00759493670886, |
|
"grad_norm": 0.000657711352687329, |
|
"learning_rate": 1.237693389592124e-06, |
|
"loss": 0.0, |
|
"step": 7020 |
|
}, |
|
{ |
|
"epoch": 87.00886075949367, |
|
"grad_norm": 0.0006206512916833162, |
|
"learning_rate": 1.2236286919831226e-06, |
|
"loss": 0.0, |
|
"step": 7030 |
|
}, |
|
{ |
|
"epoch": 87.01012658227847, |
|
"grad_norm": 0.0030793757177889347, |
|
"learning_rate": 1.2095639943741212e-06, |
|
"loss": 0.0, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 87.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.909188270568848, |
|
"eval_runtime": 8.4164, |
|
"eval_samples_per_second": 5.584, |
|
"eval_steps_per_second": 1.426, |
|
"step": 7040 |
|
}, |
|
{ |
|
"epoch": 88.00126582278482, |
|
"grad_norm": 0.0011876953067258, |
|
"learning_rate": 1.1954992967651196e-06, |
|
"loss": 0.0, |
|
"step": 7050 |
|
}, |
|
{ |
|
"epoch": 88.00253164556962, |
|
"grad_norm": 0.0019371965900063515, |
|
"learning_rate": 1.1814345991561182e-06, |
|
"loss": 0.0, |
|
"step": 7060 |
|
}, |
|
{ |
|
"epoch": 88.00379746835443, |
|
"grad_norm": 0.001290988875553012, |
|
"learning_rate": 1.1673699015471168e-06, |
|
"loss": 0.0, |
|
"step": 7070 |
|
}, |
|
{ |
|
"epoch": 88.00506329113924, |
|
"grad_norm": 0.003862161422148347, |
|
"learning_rate": 1.1533052039381154e-06, |
|
"loss": 0.0, |
|
"step": 7080 |
|
}, |
|
{ |
|
"epoch": 88.00632911392405, |
|
"grad_norm": 0.0007267307373695076, |
|
"learning_rate": 1.139240506329114e-06, |
|
"loss": 0.0, |
|
"step": 7090 |
|
}, |
|
{ |
|
"epoch": 88.00759493670886, |
|
"grad_norm": 0.0007938898052088916, |
|
"learning_rate": 1.1251758087201126e-06, |
|
"loss": 0.0, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 88.00886075949367, |
|
"grad_norm": 0.0006018871208652854, |
|
"learning_rate": 1.111111111111111e-06, |
|
"loss": 0.0, |
|
"step": 7110 |
|
}, |
|
{ |
|
"epoch": 88.01012658227847, |
|
"grad_norm": 0.0017778057372197509, |
|
"learning_rate": 1.0970464135021099e-06, |
|
"loss": 0.0, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 88.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.89414119720459, |
|
"eval_runtime": 8.438, |
|
"eval_samples_per_second": 5.57, |
|
"eval_steps_per_second": 1.422, |
|
"step": 7120 |
|
}, |
|
{ |
|
"epoch": 89.00126582278482, |
|
"grad_norm": 0.0007234832737594843, |
|
"learning_rate": 1.0829817158931085e-06, |
|
"loss": 0.0, |
|
"step": 7130 |
|
}, |
|
{ |
|
"epoch": 89.00253164556962, |
|
"grad_norm": 0.0015409559709951282, |
|
"learning_rate": 1.068917018284107e-06, |
|
"loss": 0.0, |
|
"step": 7140 |
|
}, |
|
{ |
|
"epoch": 89.00379746835443, |
|
"grad_norm": 0.0008910736651159823, |
|
"learning_rate": 1.0548523206751057e-06, |
|
"loss": 0.0, |
|
"step": 7150 |
|
}, |
|
{ |
|
"epoch": 89.00506329113924, |
|
"grad_norm": 0.0020937921945005655, |
|
"learning_rate": 1.040787623066104e-06, |
|
"loss": 0.0, |
|
"step": 7160 |
|
}, |
|
{ |
|
"epoch": 89.00632911392405, |
|
"grad_norm": 0.0014372824225574732, |
|
"learning_rate": 1.0267229254571027e-06, |
|
"loss": 0.0, |
|
"step": 7170 |
|
}, |
|
{ |
|
"epoch": 89.00759493670886, |
|
"grad_norm": 0.003179526887834072, |
|
"learning_rate": 1.0126582278481013e-06, |
|
"loss": 0.0, |
|
"step": 7180 |
|
}, |
|
{ |
|
"epoch": 89.00886075949367, |
|
"grad_norm": 0.0012057056883350015, |
|
"learning_rate": 9.985935302391e-07, |
|
"loss": 0.0001, |
|
"step": 7190 |
|
}, |
|
{ |
|
"epoch": 89.01012658227847, |
|
"grad_norm": 0.005369286518543959, |
|
"learning_rate": 9.845288326300985e-07, |
|
"loss": 0.0, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 89.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.789796352386475, |
|
"eval_runtime": 8.4427, |
|
"eval_samples_per_second": 5.567, |
|
"eval_steps_per_second": 1.421, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 90.00126582278482, |
|
"grad_norm": 0.0017162526492029428, |
|
"learning_rate": 9.704641350210971e-07, |
|
"loss": 0.0, |
|
"step": 7210 |
|
}, |
|
{ |
|
"epoch": 90.00253164556962, |
|
"grad_norm": 0.0009838847909122705, |
|
"learning_rate": 9.563994374120958e-07, |
|
"loss": 0.0, |
|
"step": 7220 |
|
}, |
|
{ |
|
"epoch": 90.00379746835443, |
|
"grad_norm": 0.015449059195816517, |
|
"learning_rate": 9.423347398030944e-07, |
|
"loss": 0.0, |
|
"step": 7230 |
|
}, |
|
{ |
|
"epoch": 90.00506329113924, |
|
"grad_norm": 0.0017991637578234076, |
|
"learning_rate": 9.28270042194093e-07, |
|
"loss": 0.0, |
|
"step": 7240 |
|
}, |
|
{ |
|
"epoch": 90.00632911392405, |
|
"grad_norm": 0.0010769497603178024, |
|
"learning_rate": 9.142053445850915e-07, |
|
"loss": 0.0, |
|
"step": 7250 |
|
}, |
|
{ |
|
"epoch": 90.00759493670886, |
|
"grad_norm": 0.0007212001946754754, |
|
"learning_rate": 9.001406469760901e-07, |
|
"loss": 0.0, |
|
"step": 7260 |
|
}, |
|
{ |
|
"epoch": 90.00886075949367, |
|
"grad_norm": 0.000739375944249332, |
|
"learning_rate": 8.860759493670887e-07, |
|
"loss": 0.0, |
|
"step": 7270 |
|
}, |
|
{ |
|
"epoch": 90.01012658227847, |
|
"grad_norm": 0.002124297898262739, |
|
"learning_rate": 8.720112517580873e-07, |
|
"loss": 0.0, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 90.01012658227847, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.827134132385254, |
|
"eval_runtime": 8.4657, |
|
"eval_samples_per_second": 5.552, |
|
"eval_steps_per_second": 1.417, |
|
"step": 7280 |
|
}, |
|
{ |
|
"epoch": 91.00126582278482, |
|
"grad_norm": 0.00743636442348361, |
|
"learning_rate": 8.579465541490858e-07, |
|
"loss": 0.0, |
|
"step": 7290 |
|
}, |
|
{ |
|
"epoch": 91.00253164556962, |
|
"grad_norm": 0.001242807717062533, |
|
"learning_rate": 8.438818565400844e-07, |
|
"loss": 0.0, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 91.00379746835443, |
|
"grad_norm": 0.017530538141727448, |
|
"learning_rate": 8.29817158931083e-07, |
|
"loss": 0.0, |
|
"step": 7310 |
|
}, |
|
{ |
|
"epoch": 91.00506329113924, |
|
"grad_norm": 0.0027876682579517365, |
|
"learning_rate": 8.157524613220815e-07, |
|
"loss": 0.0, |
|
"step": 7320 |
|
}, |
|
{ |
|
"epoch": 91.00632911392405, |
|
"grad_norm": 0.001038099406287074, |
|
"learning_rate": 8.016877637130803e-07, |
|
"loss": 0.0, |
|
"step": 7330 |
|
}, |
|
{ |
|
"epoch": 91.00759493670886, |
|
"grad_norm": 0.0012997939484193921, |
|
"learning_rate": 7.876230661040789e-07, |
|
"loss": 0.0, |
|
"step": 7340 |
|
}, |
|
{ |
|
"epoch": 91.00886075949367, |
|
"grad_norm": 0.00228080153465271, |
|
"learning_rate": 7.735583684950775e-07, |
|
"loss": 0.0, |
|
"step": 7350 |
|
}, |
|
{ |
|
"epoch": 91.01012658227847, |
|
"grad_norm": 0.0014501850819215178, |
|
"learning_rate": 7.59493670886076e-07, |
|
"loss": 0.0, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 91.01012658227847, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.832017421722412, |
|
"eval_runtime": 8.6255, |
|
"eval_samples_per_second": 5.449, |
|
"eval_steps_per_second": 1.391, |
|
"step": 7360 |
|
}, |
|
{ |
|
"epoch": 92.00126582278482, |
|
"grad_norm": 0.0007885429658927023, |
|
"learning_rate": 7.454289732770746e-07, |
|
"loss": 0.0, |
|
"step": 7370 |
|
}, |
|
{ |
|
"epoch": 92.00253164556962, |
|
"grad_norm": 0.0009592593996785581, |
|
"learning_rate": 7.313642756680732e-07, |
|
"loss": 0.0, |
|
"step": 7380 |
|
}, |
|
{ |
|
"epoch": 92.00379746835443, |
|
"grad_norm": 0.004812302067875862, |
|
"learning_rate": 7.172995780590718e-07, |
|
"loss": 0.0, |
|
"step": 7390 |
|
}, |
|
{ |
|
"epoch": 92.00506329113924, |
|
"grad_norm": 0.0012065304908901453, |
|
"learning_rate": 7.032348804500703e-07, |
|
"loss": 0.0, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 92.00632911392405, |
|
"grad_norm": 0.0025038751773536205, |
|
"learning_rate": 6.891701828410689e-07, |
|
"loss": 0.0, |
|
"step": 7410 |
|
}, |
|
{ |
|
"epoch": 92.00759493670886, |
|
"grad_norm": 0.0007439328473992646, |
|
"learning_rate": 6.751054852320675e-07, |
|
"loss": 0.0, |
|
"step": 7420 |
|
}, |
|
{ |
|
"epoch": 92.00886075949367, |
|
"grad_norm": 0.040091563016176224, |
|
"learning_rate": 6.610407876230663e-07, |
|
"loss": 0.0, |
|
"step": 7430 |
|
}, |
|
{ |
|
"epoch": 92.01012658227847, |
|
"grad_norm": 0.00362952146679163, |
|
"learning_rate": 6.469760900140648e-07, |
|
"loss": 0.0, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 92.01012658227847, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.827417850494385, |
|
"eval_runtime": 8.4772, |
|
"eval_samples_per_second": 5.544, |
|
"eval_steps_per_second": 1.416, |
|
"step": 7440 |
|
}, |
|
{ |
|
"epoch": 93.00126582278482, |
|
"grad_norm": 0.0022241012193262577, |
|
"learning_rate": 6.329113924050634e-07, |
|
"loss": 0.0, |
|
"step": 7450 |
|
}, |
|
{ |
|
"epoch": 93.00253164556962, |
|
"grad_norm": 0.025551510974764824, |
|
"learning_rate": 6.18846694796062e-07, |
|
"loss": 0.0, |
|
"step": 7460 |
|
}, |
|
{ |
|
"epoch": 93.00379746835443, |
|
"grad_norm": 0.00078696379205212, |
|
"learning_rate": 6.047819971870606e-07, |
|
"loss": 0.0, |
|
"step": 7470 |
|
}, |
|
{ |
|
"epoch": 93.00506329113924, |
|
"grad_norm": 0.0017261310713365674, |
|
"learning_rate": 5.907172995780591e-07, |
|
"loss": 0.0, |
|
"step": 7480 |
|
}, |
|
{ |
|
"epoch": 93.00632911392405, |
|
"grad_norm": 0.003345273435115814, |
|
"learning_rate": 5.766526019690577e-07, |
|
"loss": 0.0, |
|
"step": 7490 |
|
}, |
|
{ |
|
"epoch": 93.00759493670886, |
|
"grad_norm": 0.0011764048831537366, |
|
"learning_rate": 5.625879043600563e-07, |
|
"loss": 0.0, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 93.00886075949367, |
|
"grad_norm": 0.0013756465632468462, |
|
"learning_rate": 5.485232067510549e-07, |
|
"loss": 0.0, |
|
"step": 7510 |
|
}, |
|
{ |
|
"epoch": 93.01012658227847, |
|
"grad_norm": 0.0011709831887856126, |
|
"learning_rate": 5.344585091420535e-07, |
|
"loss": 0.0, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 93.01012658227847, |
|
"eval_accuracy": 0.44680851063829785, |
|
"eval_loss": 4.826868057250977, |
|
"eval_runtime": 170.4429, |
|
"eval_samples_per_second": 0.276, |
|
"eval_steps_per_second": 0.07, |
|
"step": 7520 |
|
}, |
|
{ |
|
"epoch": 94.00126582278482, |
|
"grad_norm": 0.002271972130984068, |
|
"learning_rate": 5.20393811533052e-07, |
|
"loss": 0.0001, |
|
"step": 7530 |
|
}, |
|
{ |
|
"epoch": 94.00253164556962, |
|
"grad_norm": 12.584663391113281, |
|
"learning_rate": 5.063291139240507e-07, |
|
"loss": 0.0007, |
|
"step": 7540 |
|
}, |
|
{ |
|
"epoch": 94.00379746835443, |
|
"grad_norm": 0.0010153332259505987, |
|
"learning_rate": 4.922644163150493e-07, |
|
"loss": 0.0, |
|
"step": 7550 |
|
}, |
|
{ |
|
"epoch": 94.00506329113924, |
|
"grad_norm": 0.0019480427727103233, |
|
"learning_rate": 4.781997187060479e-07, |
|
"loss": 0.0, |
|
"step": 7560 |
|
}, |
|
{ |
|
"epoch": 94.00632911392405, |
|
"grad_norm": 0.005996455904096365, |
|
"learning_rate": 4.641350210970465e-07, |
|
"loss": 0.0, |
|
"step": 7570 |
|
}, |
|
{ |
|
"epoch": 94.00759493670886, |
|
"grad_norm": 0.0005868688458576798, |
|
"learning_rate": 4.5007032348804504e-07, |
|
"loss": 0.0, |
|
"step": 7580 |
|
}, |
|
{ |
|
"epoch": 94.00886075949367, |
|
"grad_norm": 0.0008807959966361523, |
|
"learning_rate": 4.3600562587904366e-07, |
|
"loss": 0.0, |
|
"step": 7590 |
|
}, |
|
{ |
|
"epoch": 94.01012658227847, |
|
"grad_norm": 0.0008403842803090811, |
|
"learning_rate": 4.219409282700422e-07, |
|
"loss": 0.0, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 94.01012658227847, |
|
"eval_accuracy": 0.3829787234042553, |
|
"eval_loss": 4.878473281860352, |
|
"eval_runtime": 8.411, |
|
"eval_samples_per_second": 5.588, |
|
"eval_steps_per_second": 1.427, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 95.00126582278482, |
|
"grad_norm": 0.005562290083616972, |
|
"learning_rate": 4.0787623066104077e-07, |
|
"loss": 0.0001, |
|
"step": 7610 |
|
}, |
|
{ |
|
"epoch": 95.00253164556962, |
|
"grad_norm": 0.004410718102008104, |
|
"learning_rate": 3.9381153305203943e-07, |
|
"loss": 0.0, |
|
"step": 7620 |
|
}, |
|
{ |
|
"epoch": 95.00379746835443, |
|
"grad_norm": 0.0042534684762358665, |
|
"learning_rate": 3.79746835443038e-07, |
|
"loss": 0.0, |
|
"step": 7630 |
|
}, |
|
{ |
|
"epoch": 95.00506329113924, |
|
"grad_norm": 0.0012142916675657034, |
|
"learning_rate": 3.656821378340366e-07, |
|
"loss": 0.0, |
|
"step": 7640 |
|
}, |
|
{ |
|
"epoch": 95.00632911392405, |
|
"grad_norm": 0.0007107236888259649, |
|
"learning_rate": 3.5161744022503516e-07, |
|
"loss": 0.0, |
|
"step": 7650 |
|
}, |
|
{ |
|
"epoch": 95.00759493670886, |
|
"grad_norm": 0.0018182717030867934, |
|
"learning_rate": 3.3755274261603377e-07, |
|
"loss": 0.0, |
|
"step": 7660 |
|
}, |
|
{ |
|
"epoch": 95.00886075949367, |
|
"grad_norm": 0.003002484329044819, |
|
"learning_rate": 3.234880450070324e-07, |
|
"loss": 0.0001, |
|
"step": 7670 |
|
}, |
|
{ |
|
"epoch": 95.01012658227847, |
|
"grad_norm": 0.0012020288268104196, |
|
"learning_rate": 3.09423347398031e-07, |
|
"loss": 0.0, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 95.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.963972568511963, |
|
"eval_runtime": 8.4253, |
|
"eval_samples_per_second": 5.578, |
|
"eval_steps_per_second": 1.424, |
|
"step": 7680 |
|
}, |
|
{ |
|
"epoch": 96.00126582278482, |
|
"grad_norm": 0.0024408556055277586, |
|
"learning_rate": 2.9535864978902955e-07, |
|
"loss": 0.0, |
|
"step": 7690 |
|
}, |
|
{ |
|
"epoch": 96.00253164556962, |
|
"grad_norm": 329.71331787109375, |
|
"learning_rate": 2.8129395218002816e-07, |
|
"loss": 0.132, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 96.00379746835443, |
|
"grad_norm": 0.0019197100773453712, |
|
"learning_rate": 2.6722925457102677e-07, |
|
"loss": 0.0005, |
|
"step": 7710 |
|
}, |
|
{ |
|
"epoch": 96.00506329113924, |
|
"grad_norm": 0.0013428219826892018, |
|
"learning_rate": 2.5316455696202533e-07, |
|
"loss": 0.0, |
|
"step": 7720 |
|
}, |
|
{ |
|
"epoch": 96.00632911392405, |
|
"grad_norm": 0.0015475323889404535, |
|
"learning_rate": 2.3909985935302394e-07, |
|
"loss": 0.0, |
|
"step": 7730 |
|
}, |
|
{ |
|
"epoch": 96.00759493670886, |
|
"grad_norm": 0.0013204860733821988, |
|
"learning_rate": 2.2503516174402252e-07, |
|
"loss": 0.0, |
|
"step": 7740 |
|
}, |
|
{ |
|
"epoch": 96.00886075949367, |
|
"grad_norm": 0.0007807817310094833, |
|
"learning_rate": 2.109704641350211e-07, |
|
"loss": 0.0, |
|
"step": 7750 |
|
}, |
|
{ |
|
"epoch": 96.01012658227847, |
|
"grad_norm": 0.0009276365744881332, |
|
"learning_rate": 1.9690576652601972e-07, |
|
"loss": 0.0, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 96.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.947977066040039, |
|
"eval_runtime": 8.4147, |
|
"eval_samples_per_second": 5.585, |
|
"eval_steps_per_second": 1.426, |
|
"step": 7760 |
|
}, |
|
{ |
|
"epoch": 97.00126582278482, |
|
"grad_norm": 0.0013080050703138113, |
|
"learning_rate": 1.828410689170183e-07, |
|
"loss": 0.0, |
|
"step": 7770 |
|
}, |
|
{ |
|
"epoch": 97.00253164556962, |
|
"grad_norm": 0.0018693221500143409, |
|
"learning_rate": 1.6877637130801689e-07, |
|
"loss": 0.0, |
|
"step": 7780 |
|
}, |
|
{ |
|
"epoch": 97.00379746835443, |
|
"grad_norm": 0.0018208841793239117, |
|
"learning_rate": 1.547116736990155e-07, |
|
"loss": 0.0, |
|
"step": 7790 |
|
}, |
|
{ |
|
"epoch": 97.00506329113924, |
|
"grad_norm": 0.0018955061677843332, |
|
"learning_rate": 1.4064697609001408e-07, |
|
"loss": 0.0, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 97.00632911392405, |
|
"grad_norm": 0.0007756951963528991, |
|
"learning_rate": 1.2658227848101266e-07, |
|
"loss": 0.0, |
|
"step": 7810 |
|
}, |
|
{ |
|
"epoch": 97.00759493670886, |
|
"grad_norm": 0.0009716827771626413, |
|
"learning_rate": 1.1251758087201126e-07, |
|
"loss": 0.0, |
|
"step": 7820 |
|
}, |
|
{ |
|
"epoch": 97.00886075949367, |
|
"grad_norm": 0.003705208422616124, |
|
"learning_rate": 9.845288326300986e-08, |
|
"loss": 0.0, |
|
"step": 7830 |
|
}, |
|
{ |
|
"epoch": 97.01012658227847, |
|
"grad_norm": 0.013106240890920162, |
|
"learning_rate": 8.438818565400844e-08, |
|
"loss": 0.0, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 97.01012658227847, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.940354824066162, |
|
"eval_runtime": 8.4561, |
|
"eval_samples_per_second": 5.558, |
|
"eval_steps_per_second": 1.419, |
|
"step": 7840 |
|
}, |
|
{ |
|
"epoch": 98.00126582278482, |
|
"grad_norm": 0.0024029065389186144, |
|
"learning_rate": 7.032348804500704e-08, |
|
"loss": 0.0, |
|
"step": 7850 |
|
}, |
|
{ |
|
"epoch": 98.00253164556962, |
|
"grad_norm": 0.0011833859607577324, |
|
"learning_rate": 5.625879043600563e-08, |
|
"loss": 0.0, |
|
"step": 7860 |
|
}, |
|
{ |
|
"epoch": 98.00379746835443, |
|
"grad_norm": 0.0013356610434129834, |
|
"learning_rate": 4.219409282700422e-08, |
|
"loss": 0.0, |
|
"step": 7870 |
|
}, |
|
{ |
|
"epoch": 98.00506329113924, |
|
"grad_norm": 0.0007605087594129145, |
|
"learning_rate": 2.8129395218002815e-08, |
|
"loss": 0.0, |
|
"step": 7880 |
|
}, |
|
{ |
|
"epoch": 98.00632911392405, |
|
"grad_norm": 0.0008561754948459566, |
|
"learning_rate": 1.4064697609001408e-08, |
|
"loss": 0.0011, |
|
"step": 7890 |
|
}, |
|
{ |
|
"epoch": 98.00759493670886, |
|
"grad_norm": 0.0006674563628621399, |
|
"learning_rate": 0.0, |
|
"loss": 0.0, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 98.00759493670886, |
|
"eval_accuracy": 0.425531914893617, |
|
"eval_loss": 4.9351420402526855, |
|
"eval_runtime": 9.2926, |
|
"eval_samples_per_second": 5.058, |
|
"eval_steps_per_second": 1.291, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 98.00759493670886, |
|
"step": 7900, |
|
"total_flos": 1.378962555602208e+20, |
|
"train_loss": 0.1596812904629944, |
|
"train_runtime": 13780.8931, |
|
"train_samples_per_second": 2.293, |
|
"train_steps_per_second": 0.573 |
|
}, |
|
{ |
|
"epoch": 98.00759493670886, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_loss": 5.42232084274292, |
|
"eval_runtime": 8.1011, |
|
"eval_samples_per_second": 4.814, |
|
"eval_steps_per_second": 1.234, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 98.00759493670886, |
|
"eval_accuracy": 0.3076923076923077, |
|
"eval_loss": 5.422321319580078, |
|
"eval_runtime": 7.0644, |
|
"eval_samples_per_second": 5.521, |
|
"eval_steps_per_second": 1.416, |
|
"step": 7900 |
|
} |
|
], |
|
"logging_steps": 10, |
|
"max_steps": 7900, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 9223372036854775807, |
|
"save_steps": 500, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1.378962555602208e+20, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|