aplusb / checkpoint-16000 /trainer_state.json
Nefertury's picture
Upload 8 files
20d1c28
{
"best_metric": null,
"best_model_checkpoint": null,
"epoch": 9.893337455557273,
"eval_steps": 200,
"global_step": 16000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.12,
"learning_rate": 0.0002981331673926571,
"loss": 2.3038,
"step": 200
},
{
"epoch": 0.12,
"eval_loss": 1.589147925376892,
"eval_runtime": 2.1684,
"eval_samples_per_second": 461.163,
"eval_steps_per_second": 57.645,
"step": 200
},
{
"epoch": 0.25,
"learning_rate": 0.00029439950217797134,
"loss": 1.5398,
"step": 400
},
{
"epoch": 0.25,
"eval_loss": 1.4633430242538452,
"eval_runtime": 2.1071,
"eval_samples_per_second": 474.58,
"eval_steps_per_second": 59.323,
"step": 400
},
{
"epoch": 0.37,
"learning_rate": 0.00029066583696328563,
"loss": 1.4615,
"step": 600
},
{
"epoch": 0.37,
"eval_loss": 1.4296711683273315,
"eval_runtime": 2.4812,
"eval_samples_per_second": 403.032,
"eval_steps_per_second": 50.379,
"step": 600
},
{
"epoch": 0.49,
"learning_rate": 0.0002869321717485998,
"loss": 1.4244,
"step": 800
},
{
"epoch": 0.49,
"eval_loss": 1.3793567419052124,
"eval_runtime": 2.9698,
"eval_samples_per_second": 336.72,
"eval_steps_per_second": 42.09,
"step": 800
},
{
"epoch": 0.62,
"learning_rate": 0.0002831985065339141,
"loss": 1.3921,
"step": 1000
},
{
"epoch": 0.62,
"eval_loss": 1.3315461874008179,
"eval_runtime": 2.7793,
"eval_samples_per_second": 359.806,
"eval_steps_per_second": 44.976,
"step": 1000
},
{
"epoch": 0.74,
"learning_rate": 0.00027946484131922836,
"loss": 1.0958,
"step": 1200
},
{
"epoch": 0.74,
"eval_loss": 0.7548955082893372,
"eval_runtime": 2.7656,
"eval_samples_per_second": 361.592,
"eval_steps_per_second": 45.199,
"step": 1200
},
{
"epoch": 0.87,
"learning_rate": 0.0002757311761045426,
"loss": 0.6312,
"step": 1400
},
{
"epoch": 0.87,
"eval_loss": 0.3990221917629242,
"eval_runtime": 2.1502,
"eval_samples_per_second": 465.071,
"eval_steps_per_second": 58.134,
"step": 1400
},
{
"epoch": 0.99,
"learning_rate": 0.00027199751088985685,
"loss": 0.4093,
"step": 1600
},
{
"epoch": 0.99,
"eval_loss": 0.26113563776016235,
"eval_runtime": 3.5103,
"eval_samples_per_second": 284.875,
"eval_steps_per_second": 35.609,
"step": 1600
},
{
"epoch": 1.11,
"learning_rate": 0.00026826384567517114,
"loss": 0.2959,
"step": 1800
},
{
"epoch": 1.11,
"eval_loss": 0.2783801257610321,
"eval_runtime": 2.8441,
"eval_samples_per_second": 351.603,
"eval_steps_per_second": 43.95,
"step": 1800
},
{
"epoch": 1.24,
"learning_rate": 0.00026453018046048533,
"loss": 0.2589,
"step": 2000
},
{
"epoch": 1.24,
"eval_loss": 0.20705343782901764,
"eval_runtime": 2.7524,
"eval_samples_per_second": 363.322,
"eval_steps_per_second": 45.415,
"step": 2000
},
{
"epoch": 1.36,
"learning_rate": 0.0002607965152457996,
"loss": 0.2246,
"step": 2200
},
{
"epoch": 1.36,
"eval_loss": 0.15551678836345673,
"eval_runtime": 2.15,
"eval_samples_per_second": 465.117,
"eval_steps_per_second": 58.14,
"step": 2200
},
{
"epoch": 1.48,
"learning_rate": 0.00025706285003111387,
"loss": 0.1991,
"step": 2400
},
{
"epoch": 1.48,
"eval_loss": 0.15825262665748596,
"eval_runtime": 2.1344,
"eval_samples_per_second": 468.515,
"eval_steps_per_second": 58.564,
"step": 2400
},
{
"epoch": 1.61,
"learning_rate": 0.0002533291848164281,
"loss": 0.1784,
"step": 2600
},
{
"epoch": 1.61,
"eval_loss": 0.12008943408727646,
"eval_runtime": 2.1414,
"eval_samples_per_second": 466.985,
"eval_steps_per_second": 58.373,
"step": 2600
},
{
"epoch": 1.73,
"learning_rate": 0.00024959551960174235,
"loss": 0.1598,
"step": 2800
},
{
"epoch": 1.73,
"eval_loss": 0.12511701881885529,
"eval_runtime": 2.55,
"eval_samples_per_second": 392.155,
"eval_steps_per_second": 49.019,
"step": 2800
},
{
"epoch": 1.86,
"learning_rate": 0.0002458618543870566,
"loss": 0.164,
"step": 3000
},
{
"epoch": 1.86,
"eval_loss": 0.11049681156873703,
"eval_runtime": 2.9765,
"eval_samples_per_second": 335.97,
"eval_steps_per_second": 41.996,
"step": 3000
},
{
"epoch": 1.98,
"learning_rate": 0.00024212818917237084,
"loss": 0.1475,
"step": 3200
},
{
"epoch": 1.98,
"eval_loss": 0.0954003781080246,
"eval_runtime": 2.8437,
"eval_samples_per_second": 351.659,
"eval_steps_per_second": 43.957,
"step": 3200
},
{
"epoch": 2.1,
"learning_rate": 0.0002383945239576851,
"loss": 0.1388,
"step": 3400
},
{
"epoch": 2.1,
"eval_loss": 0.10116879642009735,
"eval_runtime": 2.7628,
"eval_samples_per_second": 361.954,
"eval_steps_per_second": 45.244,
"step": 3400
},
{
"epoch": 2.23,
"learning_rate": 0.00023466085874299935,
"loss": 0.1346,
"step": 3600
},
{
"epoch": 2.23,
"eval_loss": 0.10693109035491943,
"eval_runtime": 3.1697,
"eval_samples_per_second": 315.491,
"eval_steps_per_second": 39.436,
"step": 3600
},
{
"epoch": 2.35,
"learning_rate": 0.00023092719352831362,
"loss": 0.1232,
"step": 3800
},
{
"epoch": 2.35,
"eval_loss": 0.09901304543018341,
"eval_runtime": 2.1178,
"eval_samples_per_second": 472.183,
"eval_steps_per_second": 59.023,
"step": 3800
},
{
"epoch": 2.47,
"learning_rate": 0.00022719352831362786,
"loss": 0.1187,
"step": 4000
},
{
"epoch": 2.47,
"eval_loss": 0.11418598890304565,
"eval_runtime": 2.1348,
"eval_samples_per_second": 468.423,
"eval_steps_per_second": 58.553,
"step": 4000
},
{
"epoch": 2.6,
"learning_rate": 0.0002234598630989421,
"loss": 0.1133,
"step": 4200
},
{
"epoch": 2.6,
"eval_loss": 0.0984039306640625,
"eval_runtime": 2.1382,
"eval_samples_per_second": 467.676,
"eval_steps_per_second": 58.459,
"step": 4200
},
{
"epoch": 2.72,
"learning_rate": 0.00021972619788425635,
"loss": 0.1088,
"step": 4400
},
{
"epoch": 2.72,
"eval_loss": 0.07466612011194229,
"eval_runtime": 2.8862,
"eval_samples_per_second": 346.477,
"eval_steps_per_second": 43.31,
"step": 4400
},
{
"epoch": 2.84,
"learning_rate": 0.00021599253266957062,
"loss": 0.1025,
"step": 4600
},
{
"epoch": 2.84,
"eval_loss": 0.1227998435497284,
"eval_runtime": 2.8738,
"eval_samples_per_second": 347.966,
"eval_steps_per_second": 43.496,
"step": 4600
},
{
"epoch": 2.97,
"learning_rate": 0.00021225886745488486,
"loss": 0.0971,
"step": 4800
},
{
"epoch": 2.97,
"eval_loss": 0.07324225455522537,
"eval_runtime": 2.2831,
"eval_samples_per_second": 437.994,
"eval_steps_per_second": 54.749,
"step": 4800
},
{
"epoch": 3.09,
"learning_rate": 0.00020852520224019913,
"loss": 0.0853,
"step": 5000
},
{
"epoch": 3.09,
"eval_loss": 0.07788190990686417,
"eval_runtime": 2.1358,
"eval_samples_per_second": 468.199,
"eval_steps_per_second": 58.525,
"step": 5000
},
{
"epoch": 3.22,
"learning_rate": 0.00020479153702551337,
"loss": 0.0865,
"step": 5200
},
{
"epoch": 3.22,
"eval_loss": 0.06575259566307068,
"eval_runtime": 2.1474,
"eval_samples_per_second": 465.679,
"eval_steps_per_second": 58.21,
"step": 5200
},
{
"epoch": 3.34,
"learning_rate": 0.0002010578718108276,
"loss": 0.0768,
"step": 5400
},
{
"epoch": 3.34,
"eval_loss": 0.08183684200048447,
"eval_runtime": 2.1211,
"eval_samples_per_second": 471.453,
"eval_steps_per_second": 58.932,
"step": 5400
},
{
"epoch": 3.46,
"learning_rate": 0.00019732420659614186,
"loss": 0.0738,
"step": 5600
},
{
"epoch": 3.46,
"eval_loss": 0.04662672430276871,
"eval_runtime": 2.7913,
"eval_samples_per_second": 358.253,
"eval_steps_per_second": 44.782,
"step": 5600
},
{
"epoch": 3.59,
"learning_rate": 0.0001935905413814561,
"loss": 0.0622,
"step": 5800
},
{
"epoch": 3.59,
"eval_loss": 0.0433196946978569,
"eval_runtime": 3.1597,
"eval_samples_per_second": 316.49,
"eval_steps_per_second": 39.561,
"step": 5800
},
{
"epoch": 3.71,
"learning_rate": 0.00018985687616677037,
"loss": 0.0671,
"step": 6000
},
{
"epoch": 3.71,
"eval_loss": 0.038382936269044876,
"eval_runtime": 2.1009,
"eval_samples_per_second": 475.976,
"eval_steps_per_second": 59.497,
"step": 6000
},
{
"epoch": 3.83,
"learning_rate": 0.0001861232109520846,
"loss": 0.0545,
"step": 6200
},
{
"epoch": 3.83,
"eval_loss": 0.04082392156124115,
"eval_runtime": 2.1346,
"eval_samples_per_second": 468.481,
"eval_steps_per_second": 58.56,
"step": 6200
},
{
"epoch": 3.96,
"learning_rate": 0.00018238954573739888,
"loss": 0.0564,
"step": 6400
},
{
"epoch": 3.96,
"eval_loss": 0.043197453022003174,
"eval_runtime": 2.1169,
"eval_samples_per_second": 472.389,
"eval_steps_per_second": 59.049,
"step": 6400
},
{
"epoch": 4.08,
"learning_rate": 0.0001786558805227131,
"loss": 0.0523,
"step": 6600
},
{
"epoch": 4.08,
"eval_loss": 0.03342806547880173,
"eval_runtime": 2.4926,
"eval_samples_per_second": 401.182,
"eval_steps_per_second": 50.148,
"step": 6600
},
{
"epoch": 4.2,
"learning_rate": 0.00017492221530802736,
"loss": 0.0456,
"step": 6800
},
{
"epoch": 4.2,
"eval_loss": 0.02744474820792675,
"eval_runtime": 3.0,
"eval_samples_per_second": 333.335,
"eval_steps_per_second": 41.667,
"step": 6800
},
{
"epoch": 4.33,
"learning_rate": 0.0001711885500933416,
"loss": 0.0442,
"step": 7000
},
{
"epoch": 4.33,
"eval_loss": 0.024560416117310524,
"eval_runtime": 2.6752,
"eval_samples_per_second": 373.806,
"eval_steps_per_second": 46.726,
"step": 7000
},
{
"epoch": 4.45,
"learning_rate": 0.00016745488487865588,
"loss": 0.0383,
"step": 7200
},
{
"epoch": 4.45,
"eval_loss": 0.018605533987283707,
"eval_runtime": 2.1117,
"eval_samples_per_second": 473.559,
"eval_steps_per_second": 59.195,
"step": 7200
},
{
"epoch": 4.58,
"learning_rate": 0.00016372121966397012,
"loss": 0.0348,
"step": 7400
},
{
"epoch": 4.58,
"eval_loss": 0.01473915483802557,
"eval_runtime": 2.1223,
"eval_samples_per_second": 471.193,
"eval_steps_per_second": 58.899,
"step": 7400
},
{
"epoch": 4.7,
"learning_rate": 0.0001599875544492844,
"loss": 0.0299,
"step": 7600
},
{
"epoch": 4.7,
"eval_loss": 0.025838036090135574,
"eval_runtime": 2.1138,
"eval_samples_per_second": 473.088,
"eval_steps_per_second": 59.136,
"step": 7600
},
{
"epoch": 4.82,
"learning_rate": 0.0001562538892345986,
"loss": 0.0268,
"step": 7800
},
{
"epoch": 4.82,
"eval_loss": 0.01688736118376255,
"eval_runtime": 2.1658,
"eval_samples_per_second": 461.718,
"eval_steps_per_second": 57.715,
"step": 7800
},
{
"epoch": 4.95,
"learning_rate": 0.00015252022401991287,
"loss": 0.0272,
"step": 8000
},
{
"epoch": 4.95,
"eval_loss": 0.020514091476798058,
"eval_runtime": 2.1415,
"eval_samples_per_second": 466.966,
"eval_steps_per_second": 58.371,
"step": 8000
},
{
"epoch": 5.07,
"learning_rate": 0.00014878655880522712,
"loss": 0.0277,
"step": 8200
},
{
"epoch": 5.07,
"eval_loss": 0.018993763253092766,
"eval_runtime": 2.3074,
"eval_samples_per_second": 433.383,
"eval_steps_per_second": 54.173,
"step": 8200
},
{
"epoch": 5.19,
"learning_rate": 0.00014505289359054139,
"loss": 0.0253,
"step": 8400
},
{
"epoch": 5.19,
"eval_loss": 0.0132982786744833,
"eval_runtime": 2.7723,
"eval_samples_per_second": 360.706,
"eval_steps_per_second": 45.088,
"step": 8400
},
{
"epoch": 5.32,
"learning_rate": 0.00014131922837585563,
"loss": 0.0208,
"step": 8600
},
{
"epoch": 5.32,
"eval_loss": 0.011603164486587048,
"eval_runtime": 2.2147,
"eval_samples_per_second": 451.518,
"eval_steps_per_second": 56.44,
"step": 8600
},
{
"epoch": 5.44,
"learning_rate": 0.00013758556316116987,
"loss": 0.019,
"step": 8800
},
{
"epoch": 5.44,
"eval_loss": 0.007933158427476883,
"eval_runtime": 2.565,
"eval_samples_per_second": 389.858,
"eval_steps_per_second": 48.732,
"step": 8800
},
{
"epoch": 5.57,
"learning_rate": 0.00013385189794648414,
"loss": 0.0179,
"step": 9000
},
{
"epoch": 5.57,
"eval_loss": 0.00808796752244234,
"eval_runtime": 2.157,
"eval_samples_per_second": 463.605,
"eval_steps_per_second": 57.951,
"step": 9000
},
{
"epoch": 5.69,
"learning_rate": 0.00013011823273179835,
"loss": 0.0136,
"step": 9200
},
{
"epoch": 5.69,
"eval_loss": 0.02137412503361702,
"eval_runtime": 2.1642,
"eval_samples_per_second": 462.06,
"eval_steps_per_second": 57.758,
"step": 9200
},
{
"epoch": 5.81,
"learning_rate": 0.00012638456751711262,
"loss": 0.0196,
"step": 9400
},
{
"epoch": 5.81,
"eval_loss": 0.009271830320358276,
"eval_runtime": 2.7483,
"eval_samples_per_second": 363.865,
"eval_steps_per_second": 45.483,
"step": 9400
},
{
"epoch": 5.94,
"learning_rate": 0.00012265090230242687,
"loss": 0.015,
"step": 9600
},
{
"epoch": 5.94,
"eval_loss": 0.011388062499463558,
"eval_runtime": 3.1063,
"eval_samples_per_second": 321.931,
"eval_steps_per_second": 40.241,
"step": 9600
},
{
"epoch": 6.06,
"learning_rate": 0.00011891723708774112,
"loss": 0.0196,
"step": 9800
},
{
"epoch": 6.06,
"eval_loss": 0.009324445389211178,
"eval_runtime": 2.9695,
"eval_samples_per_second": 336.759,
"eval_steps_per_second": 42.095,
"step": 9800
},
{
"epoch": 6.18,
"learning_rate": 0.00011518357187305538,
"loss": 0.0192,
"step": 10000
},
{
"epoch": 6.18,
"eval_loss": 0.008494062349200249,
"eval_runtime": 2.1785,
"eval_samples_per_second": 459.035,
"eval_steps_per_second": 57.379,
"step": 10000
},
{
"epoch": 6.31,
"learning_rate": 0.00011144990665836963,
"loss": 0.0155,
"step": 10200
},
{
"epoch": 6.31,
"eval_loss": 0.005131287965923548,
"eval_runtime": 2.2151,
"eval_samples_per_second": 451.441,
"eval_steps_per_second": 56.43,
"step": 10200
},
{
"epoch": 6.43,
"learning_rate": 0.00010771624144368388,
"loss": 0.0182,
"step": 10400
},
{
"epoch": 6.43,
"eval_loss": 0.01033452432602644,
"eval_runtime": 2.204,
"eval_samples_per_second": 453.729,
"eval_steps_per_second": 56.716,
"step": 10400
},
{
"epoch": 6.55,
"learning_rate": 0.00010398257622899813,
"loss": 0.0149,
"step": 10600
},
{
"epoch": 6.55,
"eval_loss": 0.006081216037273407,
"eval_runtime": 2.6138,
"eval_samples_per_second": 382.582,
"eval_steps_per_second": 47.823,
"step": 10600
},
{
"epoch": 6.68,
"learning_rate": 0.00010024891101431236,
"loss": 0.0155,
"step": 10800
},
{
"epoch": 6.68,
"eval_loss": 0.008235114626586437,
"eval_runtime": 2.9799,
"eval_samples_per_second": 335.587,
"eval_steps_per_second": 41.948,
"step": 10800
},
{
"epoch": 6.8,
"learning_rate": 9.651524579962662e-05,
"loss": 0.0125,
"step": 11000
},
{
"epoch": 6.8,
"eval_loss": 0.0061024767346680164,
"eval_runtime": 3.1763,
"eval_samples_per_second": 314.832,
"eval_steps_per_second": 39.354,
"step": 11000
},
{
"epoch": 6.93,
"learning_rate": 9.278158058494087e-05,
"loss": 0.0126,
"step": 11200
},
{
"epoch": 6.93,
"eval_loss": 0.0077368393540382385,
"eval_runtime": 2.1677,
"eval_samples_per_second": 461.31,
"eval_steps_per_second": 57.664,
"step": 11200
},
{
"epoch": 7.05,
"learning_rate": 8.904791537025512e-05,
"loss": 0.016,
"step": 11400
},
{
"epoch": 7.05,
"eval_loss": 0.01462015975266695,
"eval_runtime": 2.163,
"eval_samples_per_second": 462.313,
"eval_steps_per_second": 57.789,
"step": 11400
},
{
"epoch": 7.17,
"learning_rate": 8.531425015556937e-05,
"loss": 0.0168,
"step": 11600
},
{
"epoch": 7.17,
"eval_loss": 0.013114248402416706,
"eval_runtime": 2.177,
"eval_samples_per_second": 459.355,
"eval_steps_per_second": 57.419,
"step": 11600
},
{
"epoch": 7.3,
"learning_rate": 8.158058494088363e-05,
"loss": 0.0115,
"step": 11800
},
{
"epoch": 7.3,
"eval_loss": 0.0058467877097427845,
"eval_runtime": 2.8432,
"eval_samples_per_second": 351.72,
"eval_steps_per_second": 43.965,
"step": 11800
},
{
"epoch": 7.42,
"learning_rate": 7.784691972619787e-05,
"loss": 0.0109,
"step": 12000
},
{
"epoch": 7.42,
"eval_loss": 0.007328983396291733,
"eval_runtime": 2.9781,
"eval_samples_per_second": 335.785,
"eval_steps_per_second": 41.973,
"step": 12000
},
{
"epoch": 7.54,
"learning_rate": 7.411325451151213e-05,
"loss": 0.01,
"step": 12200
},
{
"epoch": 7.54,
"eval_loss": 0.00543447770178318,
"eval_runtime": 2.13,
"eval_samples_per_second": 469.492,
"eval_steps_per_second": 58.686,
"step": 12200
},
{
"epoch": 7.67,
"learning_rate": 7.037958929682637e-05,
"loss": 0.0085,
"step": 12400
},
{
"epoch": 7.67,
"eval_loss": 0.005294375587254763,
"eval_runtime": 2.1484,
"eval_samples_per_second": 465.459,
"eval_steps_per_second": 58.182,
"step": 12400
},
{
"epoch": 7.79,
"learning_rate": 6.664592408214062e-05,
"loss": 0.0105,
"step": 12600
},
{
"epoch": 7.79,
"eval_loss": 0.0051603252068161964,
"eval_runtime": 2.1621,
"eval_samples_per_second": 462.523,
"eval_steps_per_second": 57.815,
"step": 12600
},
{
"epoch": 7.91,
"learning_rate": 6.291225886745488e-05,
"loss": 0.01,
"step": 12800
},
{
"epoch": 7.91,
"eval_loss": 0.005722519941627979,
"eval_runtime": 2.7684,
"eval_samples_per_second": 361.216,
"eval_steps_per_second": 45.152,
"step": 12800
},
{
"epoch": 8.04,
"learning_rate": 5.917859365276913e-05,
"loss": 0.0071,
"step": 13000
},
{
"epoch": 8.04,
"eval_loss": 0.004564732778817415,
"eval_runtime": 2.7551,
"eval_samples_per_second": 362.961,
"eval_steps_per_second": 45.37,
"step": 13000
},
{
"epoch": 8.16,
"learning_rate": 5.5444928438083385e-05,
"loss": 0.0065,
"step": 13200
},
{
"epoch": 8.16,
"eval_loss": 0.004461783915758133,
"eval_runtime": 3.1705,
"eval_samples_per_second": 315.412,
"eval_steps_per_second": 39.426,
"step": 13200
},
{
"epoch": 8.29,
"learning_rate": 5.171126322339763e-05,
"loss": 0.0075,
"step": 13400
},
{
"epoch": 8.29,
"eval_loss": 0.004132562782615423,
"eval_runtime": 3.5027,
"eval_samples_per_second": 285.498,
"eval_steps_per_second": 35.687,
"step": 13400
},
{
"epoch": 8.41,
"learning_rate": 4.797759800871188e-05,
"loss": 0.0072,
"step": 13600
},
{
"epoch": 8.41,
"eval_loss": 0.004298557061702013,
"eval_runtime": 2.1516,
"eval_samples_per_second": 464.775,
"eval_steps_per_second": 58.097,
"step": 13600
},
{
"epoch": 8.53,
"learning_rate": 4.424393279402613e-05,
"loss": 0.0077,
"step": 13800
},
{
"epoch": 8.53,
"eval_loss": 0.005747557617723942,
"eval_runtime": 2.1174,
"eval_samples_per_second": 472.272,
"eval_steps_per_second": 59.034,
"step": 13800
},
{
"epoch": 8.66,
"learning_rate": 4.051026757934038e-05,
"loss": 0.009,
"step": 14000
},
{
"epoch": 8.66,
"eval_loss": 0.005076244939118624,
"eval_runtime": 2.1715,
"eval_samples_per_second": 460.514,
"eval_steps_per_second": 57.564,
"step": 14000
},
{
"epoch": 8.78,
"learning_rate": 3.677660236465463e-05,
"loss": 0.0066,
"step": 14200
},
{
"epoch": 8.78,
"eval_loss": 0.004328867886215448,
"eval_runtime": 2.1457,
"eval_samples_per_second": 466.038,
"eval_steps_per_second": 58.255,
"step": 14200
},
{
"epoch": 8.9,
"learning_rate": 3.304293714996888e-05,
"loss": 0.0065,
"step": 14400
},
{
"epoch": 8.9,
"eval_loss": 0.004579309374094009,
"eval_runtime": 2.5023,
"eval_samples_per_second": 399.626,
"eval_steps_per_second": 49.953,
"step": 14400
},
{
"epoch": 9.03,
"learning_rate": 2.9309271935283136e-05,
"loss": 0.0047,
"step": 14600
},
{
"epoch": 9.03,
"eval_loss": 0.00406376738101244,
"eval_runtime": 3.0193,
"eval_samples_per_second": 331.204,
"eval_steps_per_second": 41.401,
"step": 14600
},
{
"epoch": 9.15,
"learning_rate": 2.5575606720597382e-05,
"loss": 0.0049,
"step": 14800
},
{
"epoch": 9.15,
"eval_loss": 0.0037133253645151854,
"eval_runtime": 2.5419,
"eval_samples_per_second": 393.406,
"eval_steps_per_second": 49.176,
"step": 14800
},
{
"epoch": 9.28,
"learning_rate": 2.1841941505911635e-05,
"loss": 0.0048,
"step": 15000
},
{
"epoch": 9.28,
"eval_loss": 0.0035180081613361835,
"eval_runtime": 2.1535,
"eval_samples_per_second": 464.362,
"eval_steps_per_second": 58.045,
"step": 15000
},
{
"epoch": 9.4,
"learning_rate": 1.8108276291225884e-05,
"loss": 0.0045,
"step": 15200
},
{
"epoch": 9.4,
"eval_loss": 0.0041992985643446445,
"eval_runtime": 2.1652,
"eval_samples_per_second": 461.858,
"eval_steps_per_second": 57.732,
"step": 15200
},
{
"epoch": 9.52,
"learning_rate": 1.4374611076540135e-05,
"loss": 0.0041,
"step": 15400
},
{
"epoch": 9.52,
"eval_loss": 0.003915323410183191,
"eval_runtime": 2.7057,
"eval_samples_per_second": 369.59,
"eval_steps_per_second": 46.199,
"step": 15400
},
{
"epoch": 9.65,
"learning_rate": 1.0640945861854385e-05,
"loss": 0.0042,
"step": 15600
},
{
"epoch": 9.65,
"eval_loss": 0.0032798268366605043,
"eval_runtime": 3.0263,
"eval_samples_per_second": 330.438,
"eval_steps_per_second": 41.305,
"step": 15600
},
{
"epoch": 9.77,
"learning_rate": 6.907280647168636e-06,
"loss": 0.0041,
"step": 15800
},
{
"epoch": 9.77,
"eval_loss": 0.003197046695277095,
"eval_runtime": 2.2279,
"eval_samples_per_second": 448.855,
"eval_steps_per_second": 56.107,
"step": 15800
},
{
"epoch": 9.89,
"learning_rate": 3.173615432482887e-06,
"loss": 0.0039,
"step": 16000
},
{
"epoch": 9.89,
"eval_loss": 0.003054018598049879,
"eval_runtime": 2.2116,
"eval_samples_per_second": 452.155,
"eval_steps_per_second": 56.519,
"step": 16000
}
],
"logging_steps": 200,
"max_steps": 16170,
"num_train_epochs": 10,
"save_steps": 200,
"total_flos": 6146864391499776.0,
"trial_name": null,
"trial_params": null
}