|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 0.0067933301849093615, |
|
"eval_steps": 1, |
|
"global_step": 187, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 3.6327968903258616e-05, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8828, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 3.6327968903258616e-05, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 507.0426, |
|
"eval_samples_per_second": 66.596, |
|
"eval_steps_per_second": 2.775, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 7.265593780651723e-05, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8984, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 7.265593780651723e-05, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 504.4591, |
|
"eval_samples_per_second": 66.937, |
|
"eval_steps_per_second": 2.789, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.00010898390670977586, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8906, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00010898390670977586, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 506.5358, |
|
"eval_samples_per_second": 66.663, |
|
"eval_steps_per_second": 2.778, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.00014531187561303446, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8828, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.00014531187561303446, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 507.9364, |
|
"eval_samples_per_second": 66.479, |
|
"eval_steps_per_second": 2.77, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.0001816398445162931, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8828, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.0001816398445162931, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 504.3114, |
|
"eval_samples_per_second": 66.957, |
|
"eval_steps_per_second": 2.79, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.00021796781341955172, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8828, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00021796781341955172, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 503.6918, |
|
"eval_samples_per_second": 67.039, |
|
"eval_steps_per_second": 2.793, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.00025429578232281035, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8906, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.00025429578232281035, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 503.9607, |
|
"eval_samples_per_second": 67.003, |
|
"eval_steps_per_second": 2.792, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.0002906237512260689, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8828, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0002906237512260689, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 505.4484, |
|
"eval_samples_per_second": 66.806, |
|
"eval_steps_per_second": 2.784, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.00032695172012932756, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.875, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.00032695172012932756, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 503.3281, |
|
"eval_samples_per_second": 67.087, |
|
"eval_steps_per_second": 2.795, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0003632796890325862, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8984, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0003632796890325862, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 502.3143, |
|
"eval_samples_per_second": 67.223, |
|
"eval_steps_per_second": 2.801, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.0003996076579358448, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8828, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.0003996076579358448, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 504.0173, |
|
"eval_samples_per_second": 66.996, |
|
"eval_steps_per_second": 2.792, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.00043593562683910344, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8906, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.00043593562683910344, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 502.8909, |
|
"eval_samples_per_second": 67.146, |
|
"eval_steps_per_second": 2.798, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.000472263595742362, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8828, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.000472263595742362, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 505.6586, |
|
"eval_samples_per_second": 66.778, |
|
"eval_steps_per_second": 2.783, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.0005085915646456207, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8828, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0005085915646456207, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 504.0652, |
|
"eval_samples_per_second": 66.989, |
|
"eval_steps_per_second": 2.791, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0005449195335488793, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8828, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0005449195335488793, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 502.8518, |
|
"eval_samples_per_second": 67.151, |
|
"eval_steps_per_second": 2.798, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0005812475024521379, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.8828, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0005812475024521379, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 503.4138, |
|
"eval_samples_per_second": 67.076, |
|
"eval_steps_per_second": 2.795, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.0006175754713553965, |
|
"grad_norm": 0.0, |
|
"learning_rate": 1e-05, |
|
"loss": 10.875, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0006175754713553965, |
|
"eval_accuracy": 5.786882743483815e-05, |
|
"eval_loss": 10.8828125, |
|
"eval_runtime": 504.116, |
|
"eval_samples_per_second": 66.983, |
|
"eval_steps_per_second": 2.791, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.0006539034402586551, |
|
"grad_norm": 9.14120864868164, |
|
"learning_rate": 9.999996367203111e-06, |
|
"loss": 10.8828, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0006539034402586551, |
|
"eval_accuracy": 0.019680148945503458, |
|
"eval_loss": 10.6328125, |
|
"eval_runtime": 503.1889, |
|
"eval_samples_per_second": 67.106, |
|
"eval_steps_per_second": 2.796, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.0006902314091619138, |
|
"grad_norm": 7.371621131896973, |
|
"learning_rate": 9.99999273440622e-06, |
|
"loss": 10.6641, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0006902314091619138, |
|
"eval_accuracy": 0.04440203625321373, |
|
"eval_loss": 10.484375, |
|
"eval_runtime": 502.8755, |
|
"eval_samples_per_second": 67.148, |
|
"eval_steps_per_second": 2.798, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.0007265593780651724, |
|
"grad_norm": 5.806190013885498, |
|
"learning_rate": 9.99998910160933e-06, |
|
"loss": 10.5078, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0007265593780651724, |
|
"eval_accuracy": 0.049879571177803755, |
|
"eval_loss": 10.3828125, |
|
"eval_runtime": 503.8208, |
|
"eval_samples_per_second": 67.022, |
|
"eval_steps_per_second": 2.793, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0007628873469684309, |
|
"grad_norm": 4.848043441772461, |
|
"learning_rate": 9.99998546881244e-06, |
|
"loss": 10.3984, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0007628873469684309, |
|
"eval_accuracy": 0.05315644057324473, |
|
"eval_loss": 10.3125, |
|
"eval_runtime": 505.0758, |
|
"eval_samples_per_second": 66.855, |
|
"eval_steps_per_second": 2.786, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0007992153158716896, |
|
"grad_norm": 3.881626844406128, |
|
"learning_rate": 9.99998183601555e-06, |
|
"loss": 10.3438, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0007992153158716896, |
|
"eval_accuracy": 0.05502066212418083, |
|
"eval_loss": 10.25, |
|
"eval_runtime": 508.1599, |
|
"eval_samples_per_second": 66.45, |
|
"eval_steps_per_second": 2.769, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.0008355432847749482, |
|
"grad_norm": 3.3826823234558105, |
|
"learning_rate": 9.999978203218658e-06, |
|
"loss": 10.2656, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0008355432847749482, |
|
"eval_accuracy": 0.05621471691417821, |
|
"eval_loss": 10.203125, |
|
"eval_runtime": 508.8692, |
|
"eval_samples_per_second": 66.357, |
|
"eval_steps_per_second": 2.765, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0008718712536782069, |
|
"grad_norm": 3.2527709007263184, |
|
"learning_rate": 9.999974570421768e-06, |
|
"loss": 10.25, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0008718712536782069, |
|
"eval_accuracy": 0.05396498301959542, |
|
"eval_loss": 10.1640625, |
|
"eval_runtime": 504.6518, |
|
"eval_samples_per_second": 66.911, |
|
"eval_steps_per_second": 2.788, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0009081992225814655, |
|
"grad_norm": 3.0049052238464355, |
|
"learning_rate": 9.999970937624878e-06, |
|
"loss": 10.1875, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.0009081992225814655, |
|
"eval_accuracy": 0.04704188536466089, |
|
"eval_loss": 10.1328125, |
|
"eval_runtime": 504.5459, |
|
"eval_samples_per_second": 66.926, |
|
"eval_steps_per_second": 2.789, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.000944527191484724, |
|
"grad_norm": 2.725101947784424, |
|
"learning_rate": 9.999967304827988e-06, |
|
"loss": 10.125, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.000944527191484724, |
|
"eval_accuracy": 0.04612232393221085, |
|
"eval_loss": 10.109375, |
|
"eval_runtime": 505.0881, |
|
"eval_samples_per_second": 66.854, |
|
"eval_steps_per_second": 2.786, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0009808551603879827, |
|
"grad_norm": 2.5348708629608154, |
|
"learning_rate": 9.999963672031098e-06, |
|
"loss": 10.125, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0009808551603879827, |
|
"eval_accuracy": 0.04800400166270834, |
|
"eval_loss": 10.0859375, |
|
"eval_runtime": 506.8917, |
|
"eval_samples_per_second": 66.616, |
|
"eval_steps_per_second": 2.776, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0010171831292912414, |
|
"grad_norm": 2.571255683898926, |
|
"learning_rate": 9.999960039234208e-06, |
|
"loss": 10.0938, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0010171831292912414, |
|
"eval_accuracy": 0.04744444860343471, |
|
"eval_loss": 10.0703125, |
|
"eval_runtime": 506.3522, |
|
"eval_samples_per_second": 66.687, |
|
"eval_steps_per_second": 2.779, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.0010535110981944999, |
|
"grad_norm": 2.22050142288208, |
|
"learning_rate": 9.999956406437317e-06, |
|
"loss": 10.0625, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0010535110981944999, |
|
"eval_accuracy": 0.0465065914736666, |
|
"eval_loss": 10.0546875, |
|
"eval_runtime": 504.6118, |
|
"eval_samples_per_second": 66.917, |
|
"eval_steps_per_second": 2.788, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.0010898390670977586, |
|
"grad_norm": 2.1588761806488037, |
|
"learning_rate": 9.999952773640427e-06, |
|
"loss": 10.0703, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0010898390670977586, |
|
"eval_accuracy": 0.04720460706501668, |
|
"eval_loss": 10.0390625, |
|
"eval_runtime": 502.6467, |
|
"eval_samples_per_second": 67.178, |
|
"eval_steps_per_second": 2.799, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.0011261670360010172, |
|
"grad_norm": 2.2763571739196777, |
|
"learning_rate": 9.999949140843537e-06, |
|
"loss": 10.0156, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0011261670360010172, |
|
"eval_accuracy": 0.05153020783188431, |
|
"eval_loss": 10.0234375, |
|
"eval_runtime": 503.4047, |
|
"eval_samples_per_second": 67.077, |
|
"eval_steps_per_second": 2.795, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.0011624950049042757, |
|
"grad_norm": 2.1108500957489014, |
|
"learning_rate": 9.999945508046645e-06, |
|
"loss": 10.0859, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0011624950049042757, |
|
"eval_accuracy": 0.05865791622834431, |
|
"eval_loss": 10.015625, |
|
"eval_runtime": 509.0053, |
|
"eval_samples_per_second": 66.339, |
|
"eval_steps_per_second": 2.764, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0011988229738075344, |
|
"grad_norm": 2.155444622039795, |
|
"learning_rate": 9.999941875249755e-06, |
|
"loss": 9.9922, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0011988229738075344, |
|
"eval_accuracy": 0.06134457569194863, |
|
"eval_loss": 10.0078125, |
|
"eval_runtime": 508.1742, |
|
"eval_samples_per_second": 66.448, |
|
"eval_steps_per_second": 2.769, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.001235150942710793, |
|
"grad_norm": 2.6189229488372803, |
|
"learning_rate": 9.999938242452866e-06, |
|
"loss": 10.0234, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.001235150942710793, |
|
"eval_accuracy": 0.06076637954869899, |
|
"eval_loss": 9.9921875, |
|
"eval_runtime": 506.0763, |
|
"eval_samples_per_second": 66.723, |
|
"eval_steps_per_second": 2.78, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.0012714789116140518, |
|
"grad_norm": 2.2233242988586426, |
|
"learning_rate": 9.999934609655974e-06, |
|
"loss": 9.9609, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0012714789116140518, |
|
"eval_accuracy": 0.05996116622448688, |
|
"eval_loss": 9.984375, |
|
"eval_runtime": 505.1839, |
|
"eval_samples_per_second": 66.841, |
|
"eval_steps_per_second": 2.785, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.0013078068805173102, |
|
"grad_norm": 2.289062261581421, |
|
"learning_rate": 9.999930976859084e-06, |
|
"loss": 10.0391, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.0013078068805173102, |
|
"eval_accuracy": 0.06078849649925438, |
|
"eval_loss": 9.9765625, |
|
"eval_runtime": 505.7312, |
|
"eval_samples_per_second": 66.769, |
|
"eval_steps_per_second": 2.782, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.001344134849420569, |
|
"grad_norm": 2.283668041229248, |
|
"learning_rate": 9.999927344062194e-06, |
|
"loss": 9.9922, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.001344134849420569, |
|
"eval_accuracy": 0.061857115756847984, |
|
"eval_loss": 9.9609375, |
|
"eval_runtime": 508.2946, |
|
"eval_samples_per_second": 66.432, |
|
"eval_steps_per_second": 2.768, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.0013804628183238276, |
|
"grad_norm": 2.189363718032837, |
|
"learning_rate": 9.999923711265304e-06, |
|
"loss": 9.9688, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.0013804628183238276, |
|
"eval_accuracy": 0.06226219754889185, |
|
"eval_loss": 9.953125, |
|
"eval_runtime": 508.653, |
|
"eval_samples_per_second": 66.385, |
|
"eval_steps_per_second": 2.766, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.001416790787227086, |
|
"grad_norm": 2.082035541534424, |
|
"learning_rate": 9.999920078468414e-06, |
|
"loss": 9.9453, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.001416790787227086, |
|
"eval_accuracy": 0.06217419292888089, |
|
"eval_loss": 9.9375, |
|
"eval_runtime": 507.0173, |
|
"eval_samples_per_second": 66.599, |
|
"eval_steps_per_second": 2.775, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.0014531187561303447, |
|
"grad_norm": 2.1098358631134033, |
|
"learning_rate": 9.999916445671524e-06, |
|
"loss": 9.9609, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0014531187561303447, |
|
"eval_accuracy": 0.06281911625934278, |
|
"eval_loss": 9.9296875, |
|
"eval_runtime": 505.2162, |
|
"eval_samples_per_second": 66.837, |
|
"eval_steps_per_second": 2.785, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.0014894467250336034, |
|
"grad_norm": 1.99773108959198, |
|
"learning_rate": 9.999912812874633e-06, |
|
"loss": 9.9609, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0014894467250336034, |
|
"eval_accuracy": 0.06401496588040619, |
|
"eval_loss": 9.9140625, |
|
"eval_runtime": 505.419, |
|
"eval_samples_per_second": 66.81, |
|
"eval_steps_per_second": 2.784, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.0015257746939368619, |
|
"grad_norm": 2.0331761837005615, |
|
"learning_rate": 9.999909180077743e-06, |
|
"loss": 10.0234, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0015257746939368619, |
|
"eval_accuracy": 0.06490028077816117, |
|
"eval_loss": 9.8984375, |
|
"eval_runtime": 508.1771, |
|
"eval_samples_per_second": 66.447, |
|
"eval_steps_per_second": 2.769, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.0015621026628401206, |
|
"grad_norm": 2.115607500076294, |
|
"learning_rate": 9.999905547280853e-06, |
|
"loss": 9.9375, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0015621026628401206, |
|
"eval_accuracy": 0.06480990234932096, |
|
"eval_loss": 9.890625, |
|
"eval_runtime": 509.849, |
|
"eval_samples_per_second": 66.229, |
|
"eval_steps_per_second": 2.76, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.0015984306317433793, |
|
"grad_norm": 2.163503646850586, |
|
"learning_rate": 9.999901914483961e-06, |
|
"loss": 9.8516, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.0015984306317433793, |
|
"eval_accuracy": 0.06442907972555643, |
|
"eval_loss": 9.875, |
|
"eval_runtime": 509.7023, |
|
"eval_samples_per_second": 66.248, |
|
"eval_steps_per_second": 2.76, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.001634758600646638, |
|
"grad_norm": 2.0586893558502197, |
|
"learning_rate": 9.999898281687071e-06, |
|
"loss": 9.8672, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.001634758600646638, |
|
"eval_accuracy": 0.06429814390440197, |
|
"eval_loss": 9.859375, |
|
"eval_runtime": 506.8968, |
|
"eval_samples_per_second": 66.615, |
|
"eval_steps_per_second": 2.776, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.0016710865695498964, |
|
"grad_norm": 1.898136854171753, |
|
"learning_rate": 9.999894648890181e-06, |
|
"loss": 9.8984, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0016710865695498964, |
|
"eval_accuracy": 0.06426870288514172, |
|
"eval_loss": 9.84375, |
|
"eval_runtime": 505.4907, |
|
"eval_samples_per_second": 66.8, |
|
"eval_steps_per_second": 2.783, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.001707414538453155, |
|
"grad_norm": 1.9898130893707275, |
|
"learning_rate": 9.99989101609329e-06, |
|
"loss": 9.875, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.001707414538453155, |
|
"eval_accuracy": 0.06445736278929022, |
|
"eval_loss": 9.8359375, |
|
"eval_runtime": 505.8927, |
|
"eval_samples_per_second": 66.747, |
|
"eval_steps_per_second": 2.781, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.0017437425073564138, |
|
"grad_norm": 1.9185965061187744, |
|
"learning_rate": 9.9998873832964e-06, |
|
"loss": 9.8672, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0017437425073564138, |
|
"eval_accuracy": 0.06462755330279167, |
|
"eval_loss": 9.8203125, |
|
"eval_runtime": 507.6208, |
|
"eval_samples_per_second": 66.52, |
|
"eval_steps_per_second": 2.772, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.0017800704762596722, |
|
"grad_norm": 1.8137012720108032, |
|
"learning_rate": 9.99988375049951e-06, |
|
"loss": 9.8984, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.0017800704762596722, |
|
"eval_accuracy": 0.06494483311704172, |
|
"eval_loss": 9.8125, |
|
"eval_runtime": 506.9398, |
|
"eval_samples_per_second": 66.609, |
|
"eval_steps_per_second": 2.775, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.001816398445162931, |
|
"grad_norm": 2.237258195877075, |
|
"learning_rate": 9.99988011770262e-06, |
|
"loss": 9.7891, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.001816398445162931, |
|
"eval_accuracy": 0.06534296717592682, |
|
"eval_loss": 9.8046875, |
|
"eval_runtime": 504.003, |
|
"eval_samples_per_second": 66.998, |
|
"eval_steps_per_second": 2.792, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.0018527264140661896, |
|
"grad_norm": 2.279499053955078, |
|
"learning_rate": 9.99987648490573e-06, |
|
"loss": 9.8281, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.0018527264140661896, |
|
"eval_accuracy": 0.06550117284972942, |
|
"eval_loss": 9.7890625, |
|
"eval_runtime": 503.0349, |
|
"eval_samples_per_second": 67.127, |
|
"eval_steps_per_second": 2.797, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.001889054382969448, |
|
"grad_norm": 1.8227834701538086, |
|
"learning_rate": 9.99987285210884e-06, |
|
"loss": 9.8281, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.001889054382969448, |
|
"eval_accuracy": 0.0654096075164746, |
|
"eval_loss": 9.78125, |
|
"eval_runtime": 505.4701, |
|
"eval_samples_per_second": 66.803, |
|
"eval_steps_per_second": 2.784, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.0019253823518727068, |
|
"grad_norm": 2.207292079925537, |
|
"learning_rate": 9.99986921931195e-06, |
|
"loss": 9.7969, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0019253823518727068, |
|
"eval_accuracy": 0.06596467349808319, |
|
"eval_loss": 9.7734375, |
|
"eval_runtime": 506.8878, |
|
"eval_samples_per_second": 66.616, |
|
"eval_steps_per_second": 2.776, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.0019617103207759654, |
|
"grad_norm": 1.9478576183319092, |
|
"learning_rate": 9.999865586515059e-06, |
|
"loss": 9.7812, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.0019617103207759654, |
|
"eval_accuracy": 0.06697281852830743, |
|
"eval_loss": 9.765625, |
|
"eval_runtime": 506.2987, |
|
"eval_samples_per_second": 66.694, |
|
"eval_steps_per_second": 2.779, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.001998038289679224, |
|
"grad_norm": 2.4436683654785156, |
|
"learning_rate": 9.999861953718169e-06, |
|
"loss": 9.8047, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.001998038289679224, |
|
"eval_accuracy": 0.06823391894328684, |
|
"eval_loss": 9.75, |
|
"eval_runtime": 505.1115, |
|
"eval_samples_per_second": 66.851, |
|
"eval_steps_per_second": 2.786, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.002034366258582483, |
|
"grad_norm": 1.911065936088562, |
|
"learning_rate": 9.999858320921279e-06, |
|
"loss": 9.7969, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.002034366258582483, |
|
"eval_accuracy": 0.0688041251934039, |
|
"eval_loss": 9.7421875, |
|
"eval_runtime": 503.4384, |
|
"eval_samples_per_second": 67.073, |
|
"eval_steps_per_second": 2.795, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.0020706942274857413, |
|
"grad_norm": 2.6223678588867188, |
|
"learning_rate": 9.999854688124387e-06, |
|
"loss": 9.7891, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0020706942274857413, |
|
"eval_accuracy": 0.0691195522788116, |
|
"eval_loss": 9.734375, |
|
"eval_runtime": 504.3798, |
|
"eval_samples_per_second": 66.948, |
|
"eval_steps_per_second": 2.79, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0021070221963889997, |
|
"grad_norm": 2.182452917098999, |
|
"learning_rate": 9.999851055327497e-06, |
|
"loss": 9.6875, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0021070221963889997, |
|
"eval_accuracy": 0.0690351083720445, |
|
"eval_loss": 9.7265625, |
|
"eval_runtime": 507.3727, |
|
"eval_samples_per_second": 66.553, |
|
"eval_steps_per_second": 2.773, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.0021433501652922586, |
|
"grad_norm": 1.8129832744598389, |
|
"learning_rate": 9.999847422530608e-06, |
|
"loss": 9.7188, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.0021433501652922586, |
|
"eval_accuracy": 0.06855982552620901, |
|
"eval_loss": 9.71875, |
|
"eval_runtime": 504.2068, |
|
"eval_samples_per_second": 66.971, |
|
"eval_steps_per_second": 2.791, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.002179678134195517, |
|
"grad_norm": 1.9449687004089355, |
|
"learning_rate": 9.999843789733716e-06, |
|
"loss": 9.7344, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.002179678134195517, |
|
"eval_accuracy": 0.06816276257618588, |
|
"eval_loss": 9.7109375, |
|
"eval_runtime": 506.178, |
|
"eval_samples_per_second": 66.71, |
|
"eval_steps_per_second": 2.78, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.0022160061030987756, |
|
"grad_norm": 2.0629429817199707, |
|
"learning_rate": 9.999840156936826e-06, |
|
"loss": 9.7344, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0022160061030987756, |
|
"eval_accuracy": 0.06871380466234002, |
|
"eval_loss": 9.6953125, |
|
"eval_runtime": 504.7991, |
|
"eval_samples_per_second": 66.892, |
|
"eval_steps_per_second": 2.787, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.0022523340720020345, |
|
"grad_norm": 1.7803966999053955, |
|
"learning_rate": 9.999836524139936e-06, |
|
"loss": 9.7578, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0022523340720020345, |
|
"eval_accuracy": 0.06968408454684902, |
|
"eval_loss": 9.6875, |
|
"eval_runtime": 503.513, |
|
"eval_samples_per_second": 67.063, |
|
"eval_steps_per_second": 2.794, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.002288662040905293, |
|
"grad_norm": 2.0867655277252197, |
|
"learning_rate": 9.999832891343046e-06, |
|
"loss": 9.6484, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.002288662040905293, |
|
"eval_accuracy": 0.07077357595280706, |
|
"eval_loss": 9.671875, |
|
"eval_runtime": 505.2322, |
|
"eval_samples_per_second": 66.835, |
|
"eval_steps_per_second": 2.785, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.0023249900098085514, |
|
"grad_norm": 2.1032333374023438, |
|
"learning_rate": 9.999829258546155e-06, |
|
"loss": 9.6328, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0023249900098085514, |
|
"eval_accuracy": 0.07146232789994546, |
|
"eval_loss": 9.6640625, |
|
"eval_runtime": 503.6458, |
|
"eval_samples_per_second": 67.045, |
|
"eval_steps_per_second": 2.794, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.0023613179787118103, |
|
"grad_norm": 1.6707836389541626, |
|
"learning_rate": 9.999825625749265e-06, |
|
"loss": 9.7656, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0023613179787118103, |
|
"eval_accuracy": 0.07207561009564684, |
|
"eval_loss": 9.65625, |
|
"eval_runtime": 503.2725, |
|
"eval_samples_per_second": 67.095, |
|
"eval_steps_per_second": 2.796, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.0023976459476150688, |
|
"grad_norm": 1.7805981636047363, |
|
"learning_rate": 9.999821992952375e-06, |
|
"loss": 9.6875, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0023976459476150688, |
|
"eval_accuracy": 0.0724615277237278, |
|
"eval_loss": 9.6484375, |
|
"eval_runtime": 502.4455, |
|
"eval_samples_per_second": 67.205, |
|
"eval_steps_per_second": 2.8, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.0024339739165183277, |
|
"grad_norm": 2.426743745803833, |
|
"learning_rate": 9.999818360155485e-06, |
|
"loss": 9.6328, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0024339739165183277, |
|
"eval_accuracy": 0.07273411045465647, |
|
"eval_loss": 9.640625, |
|
"eval_runtime": 502.4056, |
|
"eval_samples_per_second": 67.211, |
|
"eval_steps_per_second": 2.801, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.002470301885421586, |
|
"grad_norm": 1.7032270431518555, |
|
"learning_rate": 9.999814727358595e-06, |
|
"loss": 9.6953, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.002470301885421586, |
|
"eval_accuracy": 0.07335859587007634, |
|
"eval_loss": 9.6328125, |
|
"eval_runtime": 505.6904, |
|
"eval_samples_per_second": 66.774, |
|
"eval_steps_per_second": 2.782, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.0025066298543248446, |
|
"grad_norm": 1.7397522926330566, |
|
"learning_rate": 9.999811094561705e-06, |
|
"loss": 9.7188, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0025066298543248446, |
|
"eval_accuracy": 0.07437455710010418, |
|
"eval_loss": 9.625, |
|
"eval_runtime": 505.9561, |
|
"eval_samples_per_second": 66.739, |
|
"eval_steps_per_second": 2.781, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.0025429578232281035, |
|
"grad_norm": 1.6626527309417725, |
|
"learning_rate": 9.999807461764813e-06, |
|
"loss": 9.6875, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.0025429578232281035, |
|
"eval_accuracy": 0.075310387807701, |
|
"eval_loss": 9.6171875, |
|
"eval_runtime": 505.7964, |
|
"eval_samples_per_second": 66.76, |
|
"eval_steps_per_second": 2.782, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.002579285792131362, |
|
"grad_norm": 2.158742666244507, |
|
"learning_rate": 9.999803828967923e-06, |
|
"loss": 9.625, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.002579285792131362, |
|
"eval_accuracy": 0.07634256041509926, |
|
"eval_loss": 9.609375, |
|
"eval_runtime": 504.0869, |
|
"eval_samples_per_second": 66.986, |
|
"eval_steps_per_second": 2.791, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.0026156137610346204, |
|
"grad_norm": 1.8259432315826416, |
|
"learning_rate": 9.999800196171034e-06, |
|
"loss": 9.6172, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0026156137610346204, |
|
"eval_accuracy": 0.07690020284775424, |
|
"eval_loss": 9.6015625, |
|
"eval_runtime": 504.7109, |
|
"eval_samples_per_second": 66.904, |
|
"eval_steps_per_second": 2.788, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.0026519417299378793, |
|
"grad_norm": 1.7585749626159668, |
|
"learning_rate": 9.999796563374142e-06, |
|
"loss": 9.6016, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.0026519417299378793, |
|
"eval_accuracy": 0.07706367721920222, |
|
"eval_loss": 9.59375, |
|
"eval_runtime": 505.9779, |
|
"eval_samples_per_second": 66.736, |
|
"eval_steps_per_second": 2.781, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.002688269698841138, |
|
"grad_norm": 1.8018831014633179, |
|
"learning_rate": 9.999792930577252e-06, |
|
"loss": 9.6094, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.002688269698841138, |
|
"eval_accuracy": 0.07714539993048214, |
|
"eval_loss": 9.5859375, |
|
"eval_runtime": 505.2425, |
|
"eval_samples_per_second": 66.833, |
|
"eval_steps_per_second": 2.785, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.0027245976677443963, |
|
"grad_norm": 1.7233467102050781, |
|
"learning_rate": 9.999789297780362e-06, |
|
"loss": 9.5859, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.0027245976677443963, |
|
"eval_accuracy": 0.07710901117806314, |
|
"eval_loss": 9.578125, |
|
"eval_runtime": 505.5998, |
|
"eval_samples_per_second": 66.786, |
|
"eval_steps_per_second": 2.783, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.002760925636647655, |
|
"grad_norm": 1.9081038236618042, |
|
"learning_rate": 9.99978566498347e-06, |
|
"loss": 9.5859, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.002760925636647655, |
|
"eval_accuracy": 0.07673672847630625, |
|
"eval_loss": 9.5703125, |
|
"eval_runtime": 503.3457, |
|
"eval_samples_per_second": 67.085, |
|
"eval_steps_per_second": 2.795, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.0027972536055509136, |
|
"grad_norm": 1.7332507371902466, |
|
"learning_rate": 9.99978203218658e-06, |
|
"loss": 9.5859, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.0027972536055509136, |
|
"eval_accuracy": 0.07650699009985658, |
|
"eval_loss": 9.5625, |
|
"eval_runtime": 502.974, |
|
"eval_samples_per_second": 67.135, |
|
"eval_steps_per_second": 2.797, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.002833581574454172, |
|
"grad_norm": 1.8088840246200562, |
|
"learning_rate": 9.99977839938969e-06, |
|
"loss": 9.5781, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.002833581574454172, |
|
"eval_accuracy": 0.07642793068628753, |
|
"eval_loss": 9.5546875, |
|
"eval_runtime": 505.8316, |
|
"eval_samples_per_second": 66.755, |
|
"eval_steps_per_second": 2.782, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.002869909543357431, |
|
"grad_norm": 1.6268699169158936, |
|
"learning_rate": 9.9997747665928e-06, |
|
"loss": 9.6172, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.002869909543357431, |
|
"eval_accuracy": 0.07634560004835622, |
|
"eval_loss": 9.546875, |
|
"eval_runtime": 505.9115, |
|
"eval_samples_per_second": 66.745, |
|
"eval_steps_per_second": 2.781, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.0029062375122606895, |
|
"grad_norm": 1.8675007820129395, |
|
"learning_rate": 9.999771133795911e-06, |
|
"loss": 9.5859, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.0029062375122606895, |
|
"eval_accuracy": 0.07677149609098821, |
|
"eval_loss": 9.5390625, |
|
"eval_runtime": 505.7348, |
|
"eval_samples_per_second": 66.768, |
|
"eval_steps_per_second": 2.782, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.002942565481163948, |
|
"grad_norm": 1.9098700284957886, |
|
"learning_rate": 9.999767500999021e-06, |
|
"loss": 9.5859, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.002942565481163948, |
|
"eval_accuracy": 0.07698542837450169, |
|
"eval_loss": 9.53125, |
|
"eval_runtime": 502.794, |
|
"eval_samples_per_second": 67.159, |
|
"eval_steps_per_second": 2.798, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.002978893450067207, |
|
"grad_norm": 1.8791757822036743, |
|
"learning_rate": 9.99976386820213e-06, |
|
"loss": 9.5391, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.002978893450067207, |
|
"eval_accuracy": 0.07695144237980009, |
|
"eval_loss": 9.5234375, |
|
"eval_runtime": 503.0464, |
|
"eval_samples_per_second": 67.125, |
|
"eval_steps_per_second": 2.797, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.0030152214189704653, |
|
"grad_norm": 1.704999566078186, |
|
"learning_rate": 9.99976023540524e-06, |
|
"loss": 9.5391, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0030152214189704653, |
|
"eval_accuracy": 0.0764418551014932, |
|
"eval_loss": 9.5234375, |
|
"eval_runtime": 505.8882, |
|
"eval_samples_per_second": 66.748, |
|
"eval_steps_per_second": 2.781, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.0030515493878737238, |
|
"grad_norm": 1.6351501941680908, |
|
"learning_rate": 9.99975660260835e-06, |
|
"loss": 9.5312, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0030515493878737238, |
|
"eval_accuracy": 0.07578613383574707, |
|
"eval_loss": 9.515625, |
|
"eval_runtime": 506.1511, |
|
"eval_samples_per_second": 66.713, |
|
"eval_steps_per_second": 2.78, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.0030878773567769827, |
|
"grad_norm": 1.8447389602661133, |
|
"learning_rate": 9.999752969811458e-06, |
|
"loss": 9.5547, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.0030878773567769827, |
|
"eval_accuracy": 0.07570536643777649, |
|
"eval_loss": 9.5078125, |
|
"eval_runtime": 504.3987, |
|
"eval_samples_per_second": 66.945, |
|
"eval_steps_per_second": 2.789, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.003124205325680241, |
|
"grad_norm": 1.743525743484497, |
|
"learning_rate": 9.999749337014568e-06, |
|
"loss": 9.5781, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.003124205325680241, |
|
"eval_accuracy": 0.07604039192046953, |
|
"eval_loss": 9.5, |
|
"eval_runtime": 504.8644, |
|
"eval_samples_per_second": 66.883, |
|
"eval_steps_per_second": 2.787, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.0031605332945835, |
|
"grad_norm": 1.7386131286621094, |
|
"learning_rate": 9.999745704217678e-06, |
|
"loss": 9.5703, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0031605332945835, |
|
"eval_accuracy": 0.0763665879922733, |
|
"eval_loss": 9.4921875, |
|
"eval_runtime": 503.9366, |
|
"eval_samples_per_second": 67.006, |
|
"eval_steps_per_second": 2.792, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.0031968612634867585, |
|
"grad_norm": 1.8193249702453613, |
|
"learning_rate": 9.999742071420788e-06, |
|
"loss": 9.4844, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.0031968612634867585, |
|
"eval_accuracy": 0.0764146431466214, |
|
"eval_loss": 9.484375, |
|
"eval_runtime": 504.69, |
|
"eval_samples_per_second": 66.906, |
|
"eval_steps_per_second": 2.788, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.003233189232390017, |
|
"grad_norm": 1.7229965925216675, |
|
"learning_rate": 9.999738438623897e-06, |
|
"loss": 9.5312, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.003233189232390017, |
|
"eval_accuracy": 0.07648345465378129, |
|
"eval_loss": 9.4765625, |
|
"eval_runtime": 504.1538, |
|
"eval_samples_per_second": 66.978, |
|
"eval_steps_per_second": 2.791, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.003269517201293276, |
|
"grad_norm": 1.7293239831924438, |
|
"learning_rate": 9.999734805827007e-06, |
|
"loss": 9.5312, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.003269517201293276, |
|
"eval_accuracy": 0.07651958286620683, |
|
"eval_loss": 9.46875, |
|
"eval_runtime": 504.9175, |
|
"eval_samples_per_second": 66.876, |
|
"eval_steps_per_second": 2.787, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.0033058451701965343, |
|
"grad_norm": 1.6052281856536865, |
|
"learning_rate": 9.999731173030117e-06, |
|
"loss": 9.5078, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.0033058451701965343, |
|
"eval_accuracy": 0.07662689639462152, |
|
"eval_loss": 9.46875, |
|
"eval_runtime": 505.1868, |
|
"eval_samples_per_second": 66.841, |
|
"eval_steps_per_second": 2.785, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.003342173139099793, |
|
"grad_norm": 1.6207119226455688, |
|
"learning_rate": 9.999727540233227e-06, |
|
"loss": 9.5, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.003342173139099793, |
|
"eval_accuracy": 0.07678099132630518, |
|
"eval_loss": 9.4609375, |
|
"eval_runtime": 503.2555, |
|
"eval_samples_per_second": 67.097, |
|
"eval_steps_per_second": 2.796, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.0033785011080030517, |
|
"grad_norm": 1.7734899520874023, |
|
"learning_rate": 9.999723907436337e-06, |
|
"loss": 9.4844, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.0033785011080030517, |
|
"eval_accuracy": 0.07692313036717814, |
|
"eval_loss": 9.453125, |
|
"eval_runtime": 505.6253, |
|
"eval_samples_per_second": 66.783, |
|
"eval_steps_per_second": 2.783, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.00341482907690631, |
|
"grad_norm": 1.8365049362182617, |
|
"learning_rate": 9.999720274639447e-06, |
|
"loss": 9.4688, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.00341482907690631, |
|
"eval_accuracy": 0.0772882916424473, |
|
"eval_loss": 9.4453125, |
|
"eval_runtime": 504.8802, |
|
"eval_samples_per_second": 66.881, |
|
"eval_steps_per_second": 2.787, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.0034511570458095686, |
|
"grad_norm": 1.5928810834884644, |
|
"learning_rate": 9.999716641842555e-06, |
|
"loss": 9.5156, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0034511570458095686, |
|
"eval_accuracy": 0.07773769418226642, |
|
"eval_loss": 9.4375, |
|
"eval_runtime": 508.082, |
|
"eval_samples_per_second": 66.46, |
|
"eval_steps_per_second": 2.769, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.0034874850147128275, |
|
"grad_norm": 1.648245096206665, |
|
"learning_rate": 9.999713009045665e-06, |
|
"loss": 9.4453, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.0034874850147128275, |
|
"eval_accuracy": 0.07832689090301743, |
|
"eval_loss": 9.4296875, |
|
"eval_runtime": 504.0456, |
|
"eval_samples_per_second": 66.992, |
|
"eval_steps_per_second": 2.791, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.003523812983616086, |
|
"grad_norm": 1.7062604427337646, |
|
"learning_rate": 9.999709376248776e-06, |
|
"loss": 9.4766, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.003523812983616086, |
|
"eval_accuracy": 0.07944866031927555, |
|
"eval_loss": 9.421875, |
|
"eval_runtime": 504.2362, |
|
"eval_samples_per_second": 66.967, |
|
"eval_steps_per_second": 2.79, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.0035601409525193445, |
|
"grad_norm": 1.7384364604949951, |
|
"learning_rate": 9.999705743451884e-06, |
|
"loss": 9.4219, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0035601409525193445, |
|
"eval_accuracy": 0.08043025921905568, |
|
"eval_loss": 9.421875, |
|
"eval_runtime": 505.9376, |
|
"eval_samples_per_second": 66.741, |
|
"eval_steps_per_second": 2.781, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.0035964689214226034, |
|
"grad_norm": 1.6508241891860962, |
|
"learning_rate": 9.999702110654994e-06, |
|
"loss": 9.4531, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.0035964689214226034, |
|
"eval_accuracy": 0.08138432772619424, |
|
"eval_loss": 9.4140625, |
|
"eval_runtime": 506.1211, |
|
"eval_samples_per_second": 66.717, |
|
"eval_steps_per_second": 2.78, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.003632796890325862, |
|
"grad_norm": 1.690098762512207, |
|
"learning_rate": 9.999698477858104e-06, |
|
"loss": 9.4141, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.003632796890325862, |
|
"eval_accuracy": 0.08187622723383445, |
|
"eval_loss": 9.40625, |
|
"eval_runtime": 505.2427, |
|
"eval_samples_per_second": 66.833, |
|
"eval_steps_per_second": 2.785, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.0036691248592291203, |
|
"grad_norm": 1.738881230354309, |
|
"learning_rate": 9.999694845061213e-06, |
|
"loss": 9.375, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.0036691248592291203, |
|
"eval_accuracy": 0.08247908782979768, |
|
"eval_loss": 9.3984375, |
|
"eval_runtime": 504.9005, |
|
"eval_samples_per_second": 66.879, |
|
"eval_steps_per_second": 2.787, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.003705452828132379, |
|
"grad_norm": 1.5808320045471191, |
|
"learning_rate": 9.999691212264323e-06, |
|
"loss": 9.4219, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.003705452828132379, |
|
"eval_accuracy": 0.08282250849005755, |
|
"eval_loss": 9.390625, |
|
"eval_runtime": 502.7993, |
|
"eval_samples_per_second": 67.158, |
|
"eval_steps_per_second": 2.798, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.0037417807970356377, |
|
"grad_norm": 1.6801838874816895, |
|
"learning_rate": 9.999687579467433e-06, |
|
"loss": 9.3828, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.0037417807970356377, |
|
"eval_accuracy": 0.08283837248077006, |
|
"eval_loss": 9.3828125, |
|
"eval_runtime": 505.0748, |
|
"eval_samples_per_second": 66.855, |
|
"eval_steps_per_second": 2.786, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.003778108765938896, |
|
"grad_norm": 1.6030634641647339, |
|
"learning_rate": 9.999683946670543e-06, |
|
"loss": 9.375, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.003778108765938896, |
|
"eval_accuracy": 0.08272639818136136, |
|
"eval_loss": 9.3828125, |
|
"eval_runtime": 505.0381, |
|
"eval_samples_per_second": 66.86, |
|
"eval_steps_per_second": 2.786, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.003814436734842155, |
|
"grad_norm": 2.755161762237549, |
|
"learning_rate": 9.999680313873651e-06, |
|
"loss": 9.3516, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.003814436734842155, |
|
"eval_accuracy": 0.08249729668045126, |
|
"eval_loss": 9.375, |
|
"eval_runtime": 506.1198, |
|
"eval_samples_per_second": 66.717, |
|
"eval_steps_per_second": 2.78, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.0038507647037454135, |
|
"grad_norm": 1.700968861579895, |
|
"learning_rate": 9.999676681076761e-06, |
|
"loss": 9.3906, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0038507647037454135, |
|
"eval_accuracy": 0.0824550023548473, |
|
"eval_loss": 9.3671875, |
|
"eval_runtime": 504.8911, |
|
"eval_samples_per_second": 66.88, |
|
"eval_steps_per_second": 2.787, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.0038870926726486724, |
|
"grad_norm": 1.7681119441986084, |
|
"learning_rate": 9.999673048279871e-06, |
|
"loss": 9.3672, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.0038870926726486724, |
|
"eval_accuracy": 0.08233683299337206, |
|
"eval_loss": 9.359375, |
|
"eval_runtime": 503.3036, |
|
"eval_samples_per_second": 67.091, |
|
"eval_steps_per_second": 2.796, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.003923420641551931, |
|
"grad_norm": 1.7191691398620605, |
|
"learning_rate": 9.999669415482981e-06, |
|
"loss": 9.3359, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.003923420641551931, |
|
"eval_accuracy": 0.08216516608657437, |
|
"eval_loss": 9.3515625, |
|
"eval_runtime": 504.9663, |
|
"eval_samples_per_second": 66.87, |
|
"eval_steps_per_second": 2.786, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.00395974861045519, |
|
"grad_norm": 1.5659282207489014, |
|
"learning_rate": 9.999665782686092e-06, |
|
"loss": 9.4062, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.00395974861045519, |
|
"eval_accuracy": 0.08175872369678691, |
|
"eval_loss": 9.34375, |
|
"eval_runtime": 505.2603, |
|
"eval_samples_per_second": 66.831, |
|
"eval_steps_per_second": 2.785, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.003996076579358448, |
|
"grad_norm": 1.8398549556732178, |
|
"learning_rate": 9.999662149889202e-06, |
|
"loss": 9.3906, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.003996076579358448, |
|
"eval_accuracy": 0.08157759050356041, |
|
"eval_loss": 9.34375, |
|
"eval_runtime": 506.1037, |
|
"eval_samples_per_second": 66.72, |
|
"eval_steps_per_second": 2.78, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.004032404548261707, |
|
"grad_norm": 2.4529032707214355, |
|
"learning_rate": 9.99965851709231e-06, |
|
"loss": 9.25, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.004032404548261707, |
|
"eval_accuracy": 0.08160373134957025, |
|
"eval_loss": 9.3359375, |
|
"eval_runtime": 504.6658, |
|
"eval_samples_per_second": 66.91, |
|
"eval_steps_per_second": 2.788, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.004068732517164966, |
|
"grad_norm": 1.7243255376815796, |
|
"learning_rate": 9.99965488429542e-06, |
|
"loss": 9.3281, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.004068732517164966, |
|
"eval_accuracy": 0.08156398452612451, |
|
"eval_loss": 9.328125, |
|
"eval_runtime": 501.5913, |
|
"eval_samples_per_second": 67.32, |
|
"eval_steps_per_second": 2.805, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.004105060486068224, |
|
"grad_norm": 1.617632508277893, |
|
"learning_rate": 9.99965125149853e-06, |
|
"loss": 9.375, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.004105060486068224, |
|
"eval_accuracy": 0.08125842901157987, |
|
"eval_loss": 9.3203125, |
|
"eval_runtime": 504.3056, |
|
"eval_samples_per_second": 66.957, |
|
"eval_steps_per_second": 2.79, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.0041413884549714826, |
|
"grad_norm": 1.7473822832107544, |
|
"learning_rate": 9.999647618701639e-06, |
|
"loss": 9.3906, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0041413884549714826, |
|
"eval_accuracy": 0.0812115607616464, |
|
"eval_loss": 9.3203125, |
|
"eval_runtime": 507.2626, |
|
"eval_samples_per_second": 66.567, |
|
"eval_steps_per_second": 2.774, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.0041777164238747415, |
|
"grad_norm": 1.7480499744415283, |
|
"learning_rate": 9.999643985904749e-06, |
|
"loss": 9.3203, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0041777164238747415, |
|
"eval_accuracy": 0.08121138706831743, |
|
"eval_loss": 9.3125, |
|
"eval_runtime": 505.5579, |
|
"eval_samples_per_second": 66.792, |
|
"eval_steps_per_second": 2.783, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.0042140443927779995, |
|
"grad_norm": 1.645756721496582, |
|
"learning_rate": 9.999640353107859e-06, |
|
"loss": 9.3125, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.0042140443927779995, |
|
"eval_accuracy": 0.08107075336962886, |
|
"eval_loss": 9.3046875, |
|
"eval_runtime": 504.2295, |
|
"eval_samples_per_second": 66.968, |
|
"eval_steps_per_second": 2.79, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.004250372361681258, |
|
"grad_norm": 1.6185376644134521, |
|
"learning_rate": 9.999636720310967e-06, |
|
"loss": 9.3359, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.004250372361681258, |
|
"eval_accuracy": 0.08092007440674827, |
|
"eval_loss": 9.296875, |
|
"eval_runtime": 502.016, |
|
"eval_samples_per_second": 67.263, |
|
"eval_steps_per_second": 2.803, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.004286700330584517, |
|
"grad_norm": 1.718162178993225, |
|
"learning_rate": 9.999633087514077e-06, |
|
"loss": 9.2812, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.004286700330584517, |
|
"eval_accuracy": 0.08078224875021137, |
|
"eval_loss": 9.296875, |
|
"eval_runtime": 503.8149, |
|
"eval_samples_per_second": 67.023, |
|
"eval_steps_per_second": 2.793, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.004323028299487775, |
|
"grad_norm": 2.7064478397369385, |
|
"learning_rate": 9.999629454717187e-06, |
|
"loss": 9.2031, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.004323028299487775, |
|
"eval_accuracy": 0.08067389306182286, |
|
"eval_loss": 9.2890625, |
|
"eval_runtime": 506.662, |
|
"eval_samples_per_second": 66.646, |
|
"eval_steps_per_second": 2.777, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.004359356268391034, |
|
"grad_norm": 1.9571579694747925, |
|
"learning_rate": 9.999625821920297e-06, |
|
"loss": 9.2422, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.004359356268391034, |
|
"eval_accuracy": 0.08080248402303625, |
|
"eval_loss": 9.28125, |
|
"eval_runtime": 506.1232, |
|
"eval_samples_per_second": 66.717, |
|
"eval_steps_per_second": 2.78, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.004395684237294293, |
|
"grad_norm": 1.6557737588882446, |
|
"learning_rate": 9.999622189123407e-06, |
|
"loss": 9.3047, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.004395684237294293, |
|
"eval_accuracy": 0.08088458306986226, |
|
"eval_loss": 9.28125, |
|
"eval_runtime": 503.855, |
|
"eval_samples_per_second": 67.017, |
|
"eval_steps_per_second": 2.792, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.004432012206197551, |
|
"grad_norm": 1.657261848449707, |
|
"learning_rate": 9.999618556326518e-06, |
|
"loss": 9.2969, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.004432012206197551, |
|
"eval_accuracy": 0.08103369879278215, |
|
"eval_loss": 9.2734375, |
|
"eval_runtime": 503.3156, |
|
"eval_samples_per_second": 67.089, |
|
"eval_steps_per_second": 2.795, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.00446834017510081, |
|
"grad_norm": 1.9978619813919067, |
|
"learning_rate": 9.999614923529626e-06, |
|
"loss": 9.25, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.00446834017510081, |
|
"eval_accuracy": 0.08151164493632851, |
|
"eval_loss": 9.265625, |
|
"eval_runtime": 503.1303, |
|
"eval_samples_per_second": 67.114, |
|
"eval_steps_per_second": 2.796, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.004504668144004069, |
|
"grad_norm": 1.604614019393921, |
|
"learning_rate": 9.999611290732736e-06, |
|
"loss": 9.3281, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.004504668144004069, |
|
"eval_accuracy": 0.0825292273040934, |
|
"eval_loss": 9.2578125, |
|
"eval_runtime": 505.0968, |
|
"eval_samples_per_second": 66.853, |
|
"eval_steps_per_second": 2.786, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.004540996112907327, |
|
"grad_norm": 1.707974910736084, |
|
"learning_rate": 9.999607657935846e-06, |
|
"loss": 9.2656, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.004540996112907327, |
|
"eval_accuracy": 0.08359170939739675, |
|
"eval_loss": 9.2578125, |
|
"eval_runtime": 506.8511, |
|
"eval_samples_per_second": 66.621, |
|
"eval_steps_per_second": 2.776, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.004577324081810586, |
|
"grad_norm": 1.7586125135421753, |
|
"learning_rate": 9.999604025138955e-06, |
|
"loss": 9.3047, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.004577324081810586, |
|
"eval_accuracy": 0.08447563474851999, |
|
"eval_loss": 9.25, |
|
"eval_runtime": 504.2303, |
|
"eval_samples_per_second": 66.967, |
|
"eval_steps_per_second": 2.79, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.004613652050713845, |
|
"grad_norm": 1.7619037628173828, |
|
"learning_rate": 9.999600392342065e-06, |
|
"loss": 9.25, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.004613652050713845, |
|
"eval_accuracy": 0.08498837745563648, |
|
"eval_loss": 9.2421875, |
|
"eval_runtime": 502.809, |
|
"eval_samples_per_second": 67.157, |
|
"eval_steps_per_second": 2.798, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.004649980019617103, |
|
"grad_norm": 1.6512857675552368, |
|
"learning_rate": 9.999596759545175e-06, |
|
"loss": 9.2969, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.004649980019617103, |
|
"eval_accuracy": 0.08521718946766497, |
|
"eval_loss": 9.234375, |
|
"eval_runtime": 504.7926, |
|
"eval_samples_per_second": 66.893, |
|
"eval_steps_per_second": 2.787, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.004686307988520362, |
|
"grad_norm": 1.7101061344146729, |
|
"learning_rate": 9.999593126748285e-06, |
|
"loss": 9.3203, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.004686307988520362, |
|
"eval_accuracy": 0.08527115019519801, |
|
"eval_loss": 9.234375, |
|
"eval_runtime": 505.6873, |
|
"eval_samples_per_second": 66.774, |
|
"eval_steps_per_second": 2.782, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.004722635957423621, |
|
"grad_norm": 2.3074684143066406, |
|
"learning_rate": 9.999589493951393e-06, |
|
"loss": 9.25, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.004722635957423621, |
|
"eval_accuracy": 0.08533631414244955, |
|
"eval_loss": 9.2265625, |
|
"eval_runtime": 504.004, |
|
"eval_samples_per_second": 66.997, |
|
"eval_steps_per_second": 2.792, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.004758963926326879, |
|
"grad_norm": 1.86430025100708, |
|
"learning_rate": 9.999585861154503e-06, |
|
"loss": 9.2422, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.004758963926326879, |
|
"eval_accuracy": 0.08537788474584947, |
|
"eval_loss": 9.21875, |
|
"eval_runtime": 502.5558, |
|
"eval_samples_per_second": 67.191, |
|
"eval_steps_per_second": 2.8, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.0047952918952301376, |
|
"grad_norm": 1.7045161724090576, |
|
"learning_rate": 9.999582228357613e-06, |
|
"loss": 9.1641, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0047952918952301376, |
|
"eval_accuracy": 0.08549750155173277, |
|
"eval_loss": 9.2109375, |
|
"eval_runtime": 501.7481, |
|
"eval_samples_per_second": 67.299, |
|
"eval_steps_per_second": 2.804, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.0048316198641333965, |
|
"grad_norm": 1.769091248512268, |
|
"learning_rate": 9.999578595560723e-06, |
|
"loss": 9.2109, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.0048316198641333965, |
|
"eval_accuracy": 0.08575407554750815, |
|
"eval_loss": 9.2109375, |
|
"eval_runtime": 503.2243, |
|
"eval_samples_per_second": 67.101, |
|
"eval_steps_per_second": 2.796, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.004867947833036655, |
|
"grad_norm": 1.7396882772445679, |
|
"learning_rate": 9.999574962763834e-06, |
|
"loss": 9.2422, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.004867947833036655, |
|
"eval_accuracy": 0.08600703093226333, |
|
"eval_loss": 9.203125, |
|
"eval_runtime": 505.0001, |
|
"eval_samples_per_second": 66.865, |
|
"eval_steps_per_second": 2.786, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.004904275801939913, |
|
"grad_norm": 1.5990197658538818, |
|
"learning_rate": 9.999571329966944e-06, |
|
"loss": 9.2188, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.004904275801939913, |
|
"eval_accuracy": 0.08614508817990553, |
|
"eval_loss": 9.1953125, |
|
"eval_runtime": 503.7618, |
|
"eval_samples_per_second": 67.03, |
|
"eval_steps_per_second": 2.793, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.004940603770843172, |
|
"grad_norm": 1.5657048225402832, |
|
"learning_rate": 9.999567697170052e-06, |
|
"loss": 9.3047, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.004940603770843172, |
|
"eval_accuracy": 0.08612786359144943, |
|
"eval_loss": 9.1875, |
|
"eval_runtime": 504.5155, |
|
"eval_samples_per_second": 66.93, |
|
"eval_steps_per_second": 2.789, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.004976931739746431, |
|
"grad_norm": 1.7186903953552246, |
|
"learning_rate": 9.999564064373162e-06, |
|
"loss": 9.1641, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.004976931739746431, |
|
"eval_accuracy": 0.08613209012912101, |
|
"eval_loss": 9.1875, |
|
"eval_runtime": 504.0325, |
|
"eval_samples_per_second": 66.994, |
|
"eval_steps_per_second": 2.791, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.005013259708649689, |
|
"grad_norm": 1.6939176321029663, |
|
"learning_rate": 9.999560431576272e-06, |
|
"loss": 9.2188, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.005013259708649689, |
|
"eval_accuracy": 0.08593219805636586, |
|
"eval_loss": 9.1796875, |
|
"eval_runtime": 502.3491, |
|
"eval_samples_per_second": 67.218, |
|
"eval_steps_per_second": 2.801, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.005049587677552948, |
|
"grad_norm": 1.617654800415039, |
|
"learning_rate": 9.99955679877938e-06, |
|
"loss": 9.2422, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.005049587677552948, |
|
"eval_accuracy": 0.08563775891487524, |
|
"eval_loss": 9.171875, |
|
"eval_runtime": 503.9529, |
|
"eval_samples_per_second": 67.004, |
|
"eval_steps_per_second": 2.792, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.005085915646456207, |
|
"grad_norm": 1.570475697517395, |
|
"learning_rate": 9.99955316598249e-06, |
|
"loss": 9.2422, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.005085915646456207, |
|
"eval_accuracy": 0.0854915380807715, |
|
"eval_loss": 9.171875, |
|
"eval_runtime": 502.4421, |
|
"eval_samples_per_second": 67.206, |
|
"eval_steps_per_second": 2.8, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.005122243615359465, |
|
"grad_norm": 1.7714993953704834, |
|
"learning_rate": 9.9995495331856e-06, |
|
"loss": 9.1484, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.005122243615359465, |
|
"eval_accuracy": 0.08518311662629889, |
|
"eval_loss": 9.1640625, |
|
"eval_runtime": 503.434, |
|
"eval_samples_per_second": 67.073, |
|
"eval_steps_per_second": 2.795, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.005158571584262724, |
|
"grad_norm": 1.7324341535568237, |
|
"learning_rate": 9.999545900388709e-06, |
|
"loss": 9.2422, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.005158571584262724, |
|
"eval_accuracy": 0.08514038806737252, |
|
"eval_loss": 9.15625, |
|
"eval_runtime": 502.9892, |
|
"eval_samples_per_second": 67.133, |
|
"eval_steps_per_second": 2.797, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.005194899553165983, |
|
"grad_norm": 1.5842994451522827, |
|
"learning_rate": 9.99954226759182e-06, |
|
"loss": 9.1953, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.005194899553165983, |
|
"eval_accuracy": 0.0852237898141658, |
|
"eval_loss": 9.1484375, |
|
"eval_runtime": 502.9366, |
|
"eval_samples_per_second": 67.14, |
|
"eval_steps_per_second": 2.798, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.005231227522069241, |
|
"grad_norm": 1.8025195598602295, |
|
"learning_rate": 9.99953863479493e-06, |
|
"loss": 9.1641, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.005231227522069241, |
|
"eval_accuracy": 0.08531526830075614, |
|
"eval_loss": 9.1484375, |
|
"eval_runtime": 503.3947, |
|
"eval_samples_per_second": 67.079, |
|
"eval_steps_per_second": 2.795, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.0052675554909725, |
|
"grad_norm": 1.6925685405731201, |
|
"learning_rate": 9.99953500199804e-06, |
|
"loss": 9.1875, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.0052675554909725, |
|
"eval_accuracy": 0.0853937197876738, |
|
"eval_loss": 9.140625, |
|
"eval_runtime": 503.2502, |
|
"eval_samples_per_second": 67.098, |
|
"eval_steps_per_second": 2.796, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.005303883459875759, |
|
"grad_norm": 1.6498448848724365, |
|
"learning_rate": 9.99953136920115e-06, |
|
"loss": 9.1172, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.005303883459875759, |
|
"eval_accuracy": 0.0855044782337797, |
|
"eval_loss": 9.1328125, |
|
"eval_runtime": 501.7641, |
|
"eval_samples_per_second": 67.297, |
|
"eval_steps_per_second": 2.804, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.005340211428779017, |
|
"grad_norm": 1.6799402236938477, |
|
"learning_rate": 9.999527736404258e-06, |
|
"loss": 9.1094, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.005340211428779017, |
|
"eval_accuracy": 0.0855554572258321, |
|
"eval_loss": 9.1328125, |
|
"eval_runtime": 501.8847, |
|
"eval_samples_per_second": 67.28, |
|
"eval_steps_per_second": 2.803, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.005376539397682276, |
|
"grad_norm": 1.672473669052124, |
|
"learning_rate": 9.999524103607368e-06, |
|
"loss": 9.1328, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.005376539397682276, |
|
"eval_accuracy": 0.0858869798930576, |
|
"eval_loss": 9.125, |
|
"eval_runtime": 502.0405, |
|
"eval_samples_per_second": 67.26, |
|
"eval_steps_per_second": 2.803, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.0054128673665855345, |
|
"grad_norm": 1.6717029809951782, |
|
"learning_rate": 9.999520470810478e-06, |
|
"loss": 9.1641, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.0054128673665855345, |
|
"eval_accuracy": 0.0863320400996525, |
|
"eval_loss": 9.1171875, |
|
"eval_runtime": 501.7116, |
|
"eval_samples_per_second": 67.304, |
|
"eval_steps_per_second": 2.804, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.0054491953354887926, |
|
"grad_norm": 1.5716333389282227, |
|
"learning_rate": 9.999516838013588e-06, |
|
"loss": 9.1641, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0054491953354887926, |
|
"eval_accuracy": 0.08683835615359713, |
|
"eval_loss": 9.109375, |
|
"eval_runtime": 502.3324, |
|
"eval_samples_per_second": 67.22, |
|
"eval_steps_per_second": 2.801, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.0054855233043920515, |
|
"grad_norm": 1.624820351600647, |
|
"learning_rate": 9.999513205216698e-06, |
|
"loss": 9.1875, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.0054855233043920515, |
|
"eval_accuracy": 0.08733792711660013, |
|
"eval_loss": 9.109375, |
|
"eval_runtime": 502.804, |
|
"eval_samples_per_second": 67.157, |
|
"eval_steps_per_second": 2.798, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.00552185127329531, |
|
"grad_norm": 1.614936113357544, |
|
"learning_rate": 9.999509572419807e-06, |
|
"loss": 9.2031, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.00552185127329531, |
|
"eval_accuracy": 0.08752846869847912, |
|
"eval_loss": 9.1015625, |
|
"eval_runtime": 502.8248, |
|
"eval_samples_per_second": 67.155, |
|
"eval_steps_per_second": 2.798, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.005558179242198568, |
|
"grad_norm": 1.8622063398361206, |
|
"learning_rate": 9.999505939622917e-06, |
|
"loss": 9.0703, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.005558179242198568, |
|
"eval_accuracy": 0.08799966975108385, |
|
"eval_loss": 9.09375, |
|
"eval_runtime": 501.9847, |
|
"eval_samples_per_second": 67.267, |
|
"eval_steps_per_second": 2.803, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.005594507211101827, |
|
"grad_norm": 1.615602731704712, |
|
"learning_rate": 9.999502306826027e-06, |
|
"loss": 9.1484, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.005594507211101827, |
|
"eval_accuracy": 0.08840796486971365, |
|
"eval_loss": 9.0859375, |
|
"eval_runtime": 501.6895, |
|
"eval_samples_per_second": 67.307, |
|
"eval_steps_per_second": 2.805, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.005630835180005086, |
|
"grad_norm": 1.661203384399414, |
|
"learning_rate": 9.999498674029135e-06, |
|
"loss": 9.0625, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.005630835180005086, |
|
"eval_accuracy": 0.08875231189439468, |
|
"eval_loss": 9.0859375, |
|
"eval_runtime": 501.9752, |
|
"eval_samples_per_second": 67.268, |
|
"eval_steps_per_second": 2.803, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.005667163148908344, |
|
"grad_norm": 1.7897155284881592, |
|
"learning_rate": 9.999495041232245e-06, |
|
"loss": 9.0781, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.005667163148908344, |
|
"eval_accuracy": 0.08888113444671336, |
|
"eval_loss": 9.078125, |
|
"eval_runtime": 502.8161, |
|
"eval_samples_per_second": 67.156, |
|
"eval_steps_per_second": 2.798, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.005703491117811603, |
|
"grad_norm": 1.9064967632293701, |
|
"learning_rate": 9.999491408435355e-06, |
|
"loss": 9.0234, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.005703491117811603, |
|
"eval_accuracy": 0.08920223551420071, |
|
"eval_loss": 9.0703125, |
|
"eval_runtime": 502.0043, |
|
"eval_samples_per_second": 67.264, |
|
"eval_steps_per_second": 2.803, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.005739819086714862, |
|
"grad_norm": 1.635931134223938, |
|
"learning_rate": 9.999487775638464e-06, |
|
"loss": 9.0781, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.005739819086714862, |
|
"eval_accuracy": 0.08936993642332029, |
|
"eval_loss": 9.0703125, |
|
"eval_runtime": 503.082, |
|
"eval_samples_per_second": 67.12, |
|
"eval_steps_per_second": 2.797, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.00577614705561812, |
|
"grad_norm": 1.833716869354248, |
|
"learning_rate": 9.999484142841574e-06, |
|
"loss": 9.0, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.00577614705561812, |
|
"eval_accuracy": 0.08952518931053041, |
|
"eval_loss": 9.0625, |
|
"eval_runtime": 501.8316, |
|
"eval_samples_per_second": 67.288, |
|
"eval_steps_per_second": 2.804, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.005812475024521379, |
|
"grad_norm": 1.7018239498138428, |
|
"learning_rate": 9.999480510044684e-06, |
|
"loss": 9.0312, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.005812475024521379, |
|
"eval_accuracy": 0.08957336026043114, |
|
"eval_loss": 9.0546875, |
|
"eval_runtime": 502.712, |
|
"eval_samples_per_second": 67.17, |
|
"eval_steps_per_second": 2.799, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.005848802993424638, |
|
"grad_norm": 1.6031908988952637, |
|
"learning_rate": 9.999476877247794e-06, |
|
"loss": 9.0391, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.005848802993424638, |
|
"eval_accuracy": 0.08980457503017704, |
|
"eval_loss": 9.0546875, |
|
"eval_runtime": 504.1644, |
|
"eval_samples_per_second": 66.976, |
|
"eval_steps_per_second": 2.791, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.005885130962327896, |
|
"grad_norm": 1.8437420129776, |
|
"learning_rate": 9.999473244450904e-06, |
|
"loss": 9.0469, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.005885130962327896, |
|
"eval_accuracy": 0.09014976157261477, |
|
"eval_loss": 9.046875, |
|
"eval_runtime": 504.1854, |
|
"eval_samples_per_second": 66.973, |
|
"eval_steps_per_second": 2.791, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.005921458931231155, |
|
"grad_norm": 1.765324354171753, |
|
"learning_rate": 9.999469611654014e-06, |
|
"loss": 9.0859, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.005921458931231155, |
|
"eval_accuracy": 0.09048160267761005, |
|
"eval_loss": 9.0390625, |
|
"eval_runtime": 502.1677, |
|
"eval_samples_per_second": 67.242, |
|
"eval_steps_per_second": 2.802, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.005957786900134414, |
|
"grad_norm": 1.6684796810150146, |
|
"learning_rate": 9.999465978857123e-06, |
|
"loss": 9.0078, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.005957786900134414, |
|
"eval_accuracy": 0.0907586435373156, |
|
"eval_loss": 9.03125, |
|
"eval_runtime": 503.5057, |
|
"eval_samples_per_second": 67.064, |
|
"eval_steps_per_second": 2.794, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.005994114869037672, |
|
"grad_norm": 1.840463399887085, |
|
"learning_rate": 9.999462346060233e-06, |
|
"loss": 9.0156, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.005994114869037672, |
|
"eval_accuracy": 0.09093132365519895, |
|
"eval_loss": 9.03125, |
|
"eval_runtime": 502.0204, |
|
"eval_samples_per_second": 67.262, |
|
"eval_steps_per_second": 2.803, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.006030442837940931, |
|
"grad_norm": 1.782490849494934, |
|
"learning_rate": 9.999458713263343e-06, |
|
"loss": 9.0469, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.006030442837940931, |
|
"eval_accuracy": 0.09093610022174559, |
|
"eval_loss": 9.0234375, |
|
"eval_runtime": 502.0137, |
|
"eval_samples_per_second": 67.263, |
|
"eval_steps_per_second": 2.803, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.0060667708068441895, |
|
"grad_norm": 2.1105000972747803, |
|
"learning_rate": 9.999455080466451e-06, |
|
"loss": 8.9219, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.0060667708068441895, |
|
"eval_accuracy": 0.09079144262760257, |
|
"eval_loss": 9.0234375, |
|
"eval_runtime": 503.1259, |
|
"eval_samples_per_second": 67.114, |
|
"eval_steps_per_second": 2.797, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.0061030987757474476, |
|
"grad_norm": 1.6616843938827515, |
|
"learning_rate": 9.999451447669561e-06, |
|
"loss": 9.0312, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.0061030987757474476, |
|
"eval_accuracy": 0.09065688819542793, |
|
"eval_loss": 9.015625, |
|
"eval_runtime": 502.4938, |
|
"eval_samples_per_second": 67.199, |
|
"eval_steps_per_second": 2.8, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.0061394267446507065, |
|
"grad_norm": 1.6470061540603638, |
|
"learning_rate": 9.999447814872671e-06, |
|
"loss": 9.0938, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.0061394267446507065, |
|
"eval_accuracy": 0.09056121212005416, |
|
"eval_loss": 9.0078125, |
|
"eval_runtime": 503.0903, |
|
"eval_samples_per_second": 67.119, |
|
"eval_steps_per_second": 2.797, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.006175754713553965, |
|
"grad_norm": 1.594428539276123, |
|
"learning_rate": 9.999444182075781e-06, |
|
"loss": 9.0156, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.006175754713553965, |
|
"eval_accuracy": 0.09015482762804303, |
|
"eval_loss": 9.0, |
|
"eval_runtime": 502.1605, |
|
"eval_samples_per_second": 67.243, |
|
"eval_steps_per_second": 2.802, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.006212082682457223, |
|
"grad_norm": 1.5892345905303955, |
|
"learning_rate": 9.99944054927889e-06, |
|
"loss": 9.0312, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.006212082682457223, |
|
"eval_accuracy": 0.08973292653197733, |
|
"eval_loss": 9.0, |
|
"eval_runtime": 500.8624, |
|
"eval_samples_per_second": 67.418, |
|
"eval_steps_per_second": 2.809, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.006248410651360482, |
|
"grad_norm": 1.5640125274658203, |
|
"learning_rate": 9.999436916482e-06, |
|
"loss": 9.0625, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.006248410651360482, |
|
"eval_accuracy": 0.08926792054143916, |
|
"eval_loss": 8.9921875, |
|
"eval_runtime": 501.8903, |
|
"eval_samples_per_second": 67.28, |
|
"eval_steps_per_second": 2.803, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.006284738620263741, |
|
"grad_norm": 2.097386121749878, |
|
"learning_rate": 9.99943328368511e-06, |
|
"loss": 8.9844, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.006284738620263741, |
|
"eval_accuracy": 0.08908942169703535, |
|
"eval_loss": 8.984375, |
|
"eval_runtime": 503.348, |
|
"eval_samples_per_second": 67.085, |
|
"eval_steps_per_second": 2.795, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.006321066589167, |
|
"grad_norm": 1.6901289224624634, |
|
"learning_rate": 9.99942965088822e-06, |
|
"loss": 9.0703, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.006321066589167, |
|
"eval_accuracy": 0.08940612253352216, |
|
"eval_loss": 8.984375, |
|
"eval_runtime": 506.45, |
|
"eval_samples_per_second": 66.674, |
|
"eval_steps_per_second": 2.778, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.006357394558070258, |
|
"grad_norm": 1.6152195930480957, |
|
"learning_rate": 9.99942601809133e-06, |
|
"loss": 8.9609, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.006357394558070258, |
|
"eval_accuracy": 0.08984990898903795, |
|
"eval_loss": 8.9765625, |
|
"eval_runtime": 504.771, |
|
"eval_samples_per_second": 66.896, |
|
"eval_steps_per_second": 2.787, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.006393722526973517, |
|
"grad_norm": 1.6358169317245483, |
|
"learning_rate": 9.99942238529444e-06, |
|
"loss": 8.9922, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.006393722526973517, |
|
"eval_accuracy": 0.09047048630455602, |
|
"eval_loss": 8.9765625, |
|
"eval_runtime": 504.0679, |
|
"eval_samples_per_second": 66.989, |
|
"eval_steps_per_second": 2.791, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.006430050495876776, |
|
"grad_norm": 1.662377953529358, |
|
"learning_rate": 9.999418752497549e-06, |
|
"loss": 9.0234, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.006430050495876776, |
|
"eval_accuracy": 0.09099935354237847, |
|
"eval_loss": 8.96875, |
|
"eval_runtime": 504.1525, |
|
"eval_samples_per_second": 66.978, |
|
"eval_steps_per_second": 2.791, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.006466378464780034, |
|
"grad_norm": 1.6537806987762451, |
|
"learning_rate": 9.999415119700659e-06, |
|
"loss": 9.0234, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.006466378464780034, |
|
"eval_accuracy": 0.0914514483287966, |
|
"eval_loss": 8.9609375, |
|
"eval_runtime": 502.7884, |
|
"eval_samples_per_second": 67.159, |
|
"eval_steps_per_second": 2.798, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.006502706433683293, |
|
"grad_norm": 1.7109649181365967, |
|
"learning_rate": 9.999411486903769e-06, |
|
"loss": 8.9219, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.006502706433683293, |
|
"eval_accuracy": 0.09188437895125184, |
|
"eval_loss": 8.953125, |
|
"eval_runtime": 501.2279, |
|
"eval_samples_per_second": 67.369, |
|
"eval_steps_per_second": 2.807, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.006539034402586552, |
|
"grad_norm": 1.64595365524292, |
|
"learning_rate": 9.999407854106877e-06, |
|
"loss": 9.0234, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.006539034402586552, |
|
"eval_accuracy": 0.09202229145445322, |
|
"eval_loss": 8.953125, |
|
"eval_runtime": 501.0383, |
|
"eval_samples_per_second": 67.394, |
|
"eval_steps_per_second": 2.808, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.00657536237148981, |
|
"grad_norm": 1.5911909341812134, |
|
"learning_rate": 9.999404221309987e-06, |
|
"loss": 8.9375, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.00657536237148981, |
|
"eval_accuracy": 0.09211403048103702, |
|
"eval_loss": 8.9453125, |
|
"eval_runtime": 503.6267, |
|
"eval_samples_per_second": 67.048, |
|
"eval_steps_per_second": 2.794, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.006611690340393069, |
|
"grad_norm": 1.4985125064849854, |
|
"learning_rate": 9.999400588513097e-06, |
|
"loss": 8.9688, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.006611690340393069, |
|
"eval_accuracy": 0.09187829968473793, |
|
"eval_loss": 8.9375, |
|
"eval_runtime": 504.6287, |
|
"eval_samples_per_second": 66.915, |
|
"eval_steps_per_second": 2.788, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.006648018309296328, |
|
"grad_norm": 1.7256035804748535, |
|
"learning_rate": 9.999396955716206e-06, |
|
"loss": 8.9375, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.006648018309296328, |
|
"eval_accuracy": 0.09130482221025861, |
|
"eval_loss": 8.9375, |
|
"eval_runtime": 502.7855, |
|
"eval_samples_per_second": 67.16, |
|
"eval_steps_per_second": 2.798, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.006684346278199586, |
|
"grad_norm": 1.5093938112258911, |
|
"learning_rate": 9.999393322919316e-06, |
|
"loss": 9.0, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.006684346278199586, |
|
"eval_accuracy": 0.09121345951922091, |
|
"eval_loss": 8.9296875, |
|
"eval_runtime": 502.1658, |
|
"eval_samples_per_second": 67.243, |
|
"eval_steps_per_second": 2.802, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.0067206742471028445, |
|
"grad_norm": 1.540442943572998, |
|
"learning_rate": 9.999389690122426e-06, |
|
"loss": 8.9375, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.0067206742471028445, |
|
"eval_accuracy": 0.09127399164436661, |
|
"eval_loss": 8.921875, |
|
"eval_runtime": 502.1504, |
|
"eval_samples_per_second": 67.245, |
|
"eval_steps_per_second": 2.802, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.006757002216006103, |
|
"grad_norm": 1.5126348733901978, |
|
"learning_rate": 9.999386057325536e-06, |
|
"loss": 8.9609, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.006757002216006103, |
|
"eval_accuracy": 0.09126880979338571, |
|
"eval_loss": 8.921875, |
|
"eval_runtime": 500.7274, |
|
"eval_samples_per_second": 67.436, |
|
"eval_steps_per_second": 2.81, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.0067933301849093615, |
|
"grad_norm": 1.6009377241134644, |
|
"learning_rate": 9.999382424528646e-06, |
|
"loss": 8.9688, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.0067933301849093615, |
|
"eval_accuracy": 0.09167533902983765, |
|
"eval_loss": 8.9140625, |
|
"eval_runtime": 498.1171, |
|
"eval_samples_per_second": 67.789, |
|
"eval_steps_per_second": 2.825, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.0067933301849093615, |
|
"step": 187, |
|
"total_flos": 1043124562427904.0, |
|
"train_loss": 9.606408756684491, |
|
"train_runtime": 94484.0244, |
|
"train_samples_per_second": 699.211, |
|
"train_steps_per_second": 29.134 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 2752700, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 100, |
|
"save_steps": 1000000, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": false, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 1043124562427904.0, |
|
"train_batch_size": 24, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|