|
{ |
|
"best_metric": 5.108978484713589e-08, |
|
"best_model_checkpoint": "./outputs_L3_hindi/checkpoint-80000", |
|
"epoch": 9.70439502953017, |
|
"eval_steps": 10000, |
|
"global_step": 80000, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.012130493786912713, |
|
"grad_norm": 5.738617420196533, |
|
"learning_rate": 1.213150551983501e-09, |
|
"loss": 2.8724, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.024260987573825426, |
|
"grad_norm": 5.746348857879639, |
|
"learning_rate": 2.426301103967002e-09, |
|
"loss": 2.8724, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.03639148136073814, |
|
"grad_norm": 5.743950843811035, |
|
"learning_rate": 3.6394516559505032e-09, |
|
"loss": 2.8724, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.04852197514765085, |
|
"grad_norm": 5.745969295501709, |
|
"learning_rate": 4.852602207934004e-09, |
|
"loss": 2.8724, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.060652468934563566, |
|
"grad_norm": 5.757739067077637, |
|
"learning_rate": 6.065752759917506e-09, |
|
"loss": 2.8724, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 0.07278296272147627, |
|
"grad_norm": 5.759668350219727, |
|
"learning_rate": 7.2789033119010064e-09, |
|
"loss": 2.8724, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 0.084913456508389, |
|
"grad_norm": 5.789394855499268, |
|
"learning_rate": 8.492053863884507e-09, |
|
"loss": 2.8724, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 0.0970439502953017, |
|
"grad_norm": 5.801012992858887, |
|
"learning_rate": 9.705204415868008e-09, |
|
"loss": 2.8724, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 0.10917444408221443, |
|
"grad_norm": 5.81521463394165, |
|
"learning_rate": 1.0918354967851511e-08, |
|
"loss": 2.8724, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 0.12130493786912713, |
|
"grad_norm": 5.836797714233398, |
|
"learning_rate": 1.2131505519835012e-08, |
|
"loss": 2.8724, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 0.13343543165603985, |
|
"grad_norm": 5.866050720214844, |
|
"learning_rate": 1.334465607181851e-08, |
|
"loss": 2.8724, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 0.14556592544295255, |
|
"grad_norm": 5.896239280700684, |
|
"learning_rate": 1.4557806623802013e-08, |
|
"loss": 2.8724, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 0.15769641922986527, |
|
"grad_norm": 5.933716297149658, |
|
"learning_rate": 1.5770957175785514e-08, |
|
"loss": 2.8724, |
|
"step": 1300 |
|
}, |
|
{ |
|
"epoch": 0.169826913016778, |
|
"grad_norm": 5.96455717086792, |
|
"learning_rate": 1.6984107727769014e-08, |
|
"loss": 2.8724, |
|
"step": 1400 |
|
}, |
|
{ |
|
"epoch": 0.1819574068036907, |
|
"grad_norm": 5.977672100067139, |
|
"learning_rate": 1.8197258279752517e-08, |
|
"loss": 2.8724, |
|
"step": 1500 |
|
}, |
|
{ |
|
"epoch": 0.1940879005906034, |
|
"grad_norm": 6.015458106994629, |
|
"learning_rate": 1.9410408831736016e-08, |
|
"loss": 2.8724, |
|
"step": 1600 |
|
}, |
|
{ |
|
"epoch": 0.20621839437751613, |
|
"grad_norm": 6.05010986328125, |
|
"learning_rate": 2.062355938371952e-08, |
|
"loss": 2.8724, |
|
"step": 1700 |
|
}, |
|
{ |
|
"epoch": 0.21834888816442885, |
|
"grad_norm": 6.104752540588379, |
|
"learning_rate": 2.1836709935703022e-08, |
|
"loss": 2.8724, |
|
"step": 1800 |
|
}, |
|
{ |
|
"epoch": 0.23047938195134154, |
|
"grad_norm": 6.165137767791748, |
|
"learning_rate": 2.304986048768652e-08, |
|
"loss": 2.8724, |
|
"step": 1900 |
|
}, |
|
{ |
|
"epoch": 0.24260987573825427, |
|
"grad_norm": 6.208470821380615, |
|
"learning_rate": 2.4263011039670024e-08, |
|
"loss": 2.8724, |
|
"step": 2000 |
|
}, |
|
{ |
|
"epoch": 0.254740369525167, |
|
"grad_norm": 6.256989002227783, |
|
"learning_rate": 2.5476161591653524e-08, |
|
"loss": 2.8724, |
|
"step": 2100 |
|
}, |
|
{ |
|
"epoch": 0.2668708633120797, |
|
"grad_norm": 6.313167572021484, |
|
"learning_rate": 2.668931214363702e-08, |
|
"loss": 2.8724, |
|
"step": 2200 |
|
}, |
|
{ |
|
"epoch": 0.27900135709899243, |
|
"grad_norm": 6.35524845123291, |
|
"learning_rate": 2.7902462695620526e-08, |
|
"loss": 2.8723, |
|
"step": 2300 |
|
}, |
|
{ |
|
"epoch": 0.2911318508859051, |
|
"grad_norm": 6.443501949310303, |
|
"learning_rate": 2.9115613247604026e-08, |
|
"loss": 2.8714, |
|
"step": 2400 |
|
}, |
|
{ |
|
"epoch": 0.3032623446728178, |
|
"grad_norm": 6.514742374420166, |
|
"learning_rate": 3.032876379958753e-08, |
|
"loss": 2.8727, |
|
"step": 2500 |
|
}, |
|
{ |
|
"epoch": 0.31539283845973054, |
|
"grad_norm": 6.6397552490234375, |
|
"learning_rate": 3.154191435157103e-08, |
|
"loss": 2.8723, |
|
"step": 2600 |
|
}, |
|
{ |
|
"epoch": 0.32752333224664326, |
|
"grad_norm": 6.5825324058532715, |
|
"learning_rate": 3.275506490355453e-08, |
|
"loss": 2.8718, |
|
"step": 2700 |
|
}, |
|
{ |
|
"epoch": 0.339653826033556, |
|
"grad_norm": 6.749302387237549, |
|
"learning_rate": 3.396821545553803e-08, |
|
"loss": 2.8703, |
|
"step": 2800 |
|
}, |
|
{ |
|
"epoch": 0.3517843198204687, |
|
"grad_norm": 6.816134929656982, |
|
"learning_rate": 3.518136600752153e-08, |
|
"loss": 2.8613, |
|
"step": 2900 |
|
}, |
|
{ |
|
"epoch": 0.3639148136073814, |
|
"grad_norm": 7.144017219543457, |
|
"learning_rate": 3.639451655950503e-08, |
|
"loss": 2.8315, |
|
"step": 3000 |
|
}, |
|
{ |
|
"epoch": 0.3760453073942941, |
|
"grad_norm": 8.347600936889648, |
|
"learning_rate": 3.760766711148853e-08, |
|
"loss": 2.7179, |
|
"step": 3100 |
|
}, |
|
{ |
|
"epoch": 0.3881758011812068, |
|
"grad_norm": 8.95757007598877, |
|
"learning_rate": 3.882081766347203e-08, |
|
"loss": 2.3659, |
|
"step": 3200 |
|
}, |
|
{ |
|
"epoch": 0.40030629496811954, |
|
"grad_norm": 8.870607376098633, |
|
"learning_rate": 4.003396821545554e-08, |
|
"loss": 2.054, |
|
"step": 3300 |
|
}, |
|
{ |
|
"epoch": 0.41243678875503226, |
|
"grad_norm": 8.5811767578125, |
|
"learning_rate": 4.124711876743904e-08, |
|
"loss": 1.9066, |
|
"step": 3400 |
|
}, |
|
{ |
|
"epoch": 0.424567282541945, |
|
"grad_norm": 8.555498123168945, |
|
"learning_rate": 4.246026931942254e-08, |
|
"loss": 1.7966, |
|
"step": 3500 |
|
}, |
|
{ |
|
"epoch": 0.4366977763288577, |
|
"grad_norm": 8.071405410766602, |
|
"learning_rate": 4.3673419871406044e-08, |
|
"loss": 1.7012, |
|
"step": 3600 |
|
}, |
|
{ |
|
"epoch": 0.4488282701157704, |
|
"grad_norm": 7.7888875007629395, |
|
"learning_rate": 4.488657042338954e-08, |
|
"loss": 1.6079, |
|
"step": 3700 |
|
}, |
|
{ |
|
"epoch": 0.4609587639026831, |
|
"grad_norm": 7.70664119720459, |
|
"learning_rate": 4.609972097537304e-08, |
|
"loss": 1.5164, |
|
"step": 3800 |
|
}, |
|
{ |
|
"epoch": 0.4730892576895958, |
|
"grad_norm": 7.393267631530762, |
|
"learning_rate": 4.731287152735654e-08, |
|
"loss": 1.4265, |
|
"step": 3900 |
|
}, |
|
{ |
|
"epoch": 0.48521975147650853, |
|
"grad_norm": 6.967721939086914, |
|
"learning_rate": 4.852602207934005e-08, |
|
"loss": 1.3367, |
|
"step": 4000 |
|
}, |
|
{ |
|
"epoch": 0.49735024526342125, |
|
"grad_norm": 6.281134605407715, |
|
"learning_rate": 4.973917263132355e-08, |
|
"loss": 1.2483, |
|
"step": 4100 |
|
}, |
|
{ |
|
"epoch": 0.509480739050334, |
|
"grad_norm": 5.565547943115234, |
|
"learning_rate": 5.095232318330705e-08, |
|
"loss": 1.1588, |
|
"step": 4200 |
|
}, |
|
{ |
|
"epoch": 0.5216112328372466, |
|
"grad_norm": 4.586236953735352, |
|
"learning_rate": 5.216547373529054e-08, |
|
"loss": 1.0804, |
|
"step": 4300 |
|
}, |
|
{ |
|
"epoch": 0.5337417266241594, |
|
"grad_norm": 3.6855998039245605, |
|
"learning_rate": 5.337862428727404e-08, |
|
"loss": 1.013, |
|
"step": 4400 |
|
}, |
|
{ |
|
"epoch": 0.5458722204110721, |
|
"grad_norm": 3.2828409671783447, |
|
"learning_rate": 5.459177483925755e-08, |
|
"loss": 0.9534, |
|
"step": 4500 |
|
}, |
|
{ |
|
"epoch": 0.5580027141979849, |
|
"grad_norm": 3.190077066421509, |
|
"learning_rate": 5.580492539124105e-08, |
|
"loss": 0.8995, |
|
"step": 4600 |
|
}, |
|
{ |
|
"epoch": 0.5701332079848975, |
|
"grad_norm": 3.253711700439453, |
|
"learning_rate": 5.701807594322455e-08, |
|
"loss": 0.8491, |
|
"step": 4700 |
|
}, |
|
{ |
|
"epoch": 0.5822637017718102, |
|
"grad_norm": 3.392833948135376, |
|
"learning_rate": 5.823122649520805e-08, |
|
"loss": 0.7953, |
|
"step": 4800 |
|
}, |
|
{ |
|
"epoch": 0.594394195558723, |
|
"grad_norm": 3.544286012649536, |
|
"learning_rate": 5.944437704719155e-08, |
|
"loss": 0.7384, |
|
"step": 4900 |
|
}, |
|
{ |
|
"epoch": 0.6065246893456356, |
|
"grad_norm": 3.427243947982788, |
|
"learning_rate": 6.065752759917506e-08, |
|
"loss": 0.6794, |
|
"step": 5000 |
|
}, |
|
{ |
|
"epoch": 0.6186551831325484, |
|
"grad_norm": 2.903007745742798, |
|
"learning_rate": 6.187067815115856e-08, |
|
"loss": 0.6207, |
|
"step": 5100 |
|
}, |
|
{ |
|
"epoch": 0.6307856769194611, |
|
"grad_norm": 2.3621325492858887, |
|
"learning_rate": 6.308382870314206e-08, |
|
"loss": 0.5738, |
|
"step": 5200 |
|
}, |
|
{ |
|
"epoch": 0.6429161707063739, |
|
"grad_norm": 2.0431277751922607, |
|
"learning_rate": 6.429697925512556e-08, |
|
"loss": 0.5392, |
|
"step": 5300 |
|
}, |
|
{ |
|
"epoch": 0.6550466644932865, |
|
"grad_norm": 1.8731169700622559, |
|
"learning_rate": 6.551012980710906e-08, |
|
"loss": 0.513, |
|
"step": 5400 |
|
}, |
|
{ |
|
"epoch": 0.6671771582801992, |
|
"grad_norm": 1.7845033407211304, |
|
"learning_rate": 6.672328035909256e-08, |
|
"loss": 0.4931, |
|
"step": 5500 |
|
}, |
|
{ |
|
"epoch": 0.679307652067112, |
|
"grad_norm": 1.7125587463378906, |
|
"learning_rate": 6.793643091107605e-08, |
|
"loss": 0.4783, |
|
"step": 5600 |
|
}, |
|
{ |
|
"epoch": 0.6914381458540246, |
|
"grad_norm": 1.687855839729309, |
|
"learning_rate": 6.914958146305955e-08, |
|
"loss": 0.4678, |
|
"step": 5700 |
|
}, |
|
{ |
|
"epoch": 0.7035686396409374, |
|
"grad_norm": 1.6900482177734375, |
|
"learning_rate": 7.036273201504305e-08, |
|
"loss": 0.4585, |
|
"step": 5800 |
|
}, |
|
{ |
|
"epoch": 0.7156991334278501, |
|
"grad_norm": 1.7122031450271606, |
|
"learning_rate": 7.157588256702657e-08, |
|
"loss": 0.4531, |
|
"step": 5900 |
|
}, |
|
{ |
|
"epoch": 0.7278296272147629, |
|
"grad_norm": 1.7423112392425537, |
|
"learning_rate": 7.278903311901007e-08, |
|
"loss": 0.446, |
|
"step": 6000 |
|
}, |
|
{ |
|
"epoch": 0.7399601210016755, |
|
"grad_norm": 1.8058453798294067, |
|
"learning_rate": 7.400218367099357e-08, |
|
"loss": 0.4414, |
|
"step": 6100 |
|
}, |
|
{ |
|
"epoch": 0.7520906147885882, |
|
"grad_norm": 1.8512169122695923, |
|
"learning_rate": 7.521533422297707e-08, |
|
"loss": 0.4368, |
|
"step": 6200 |
|
}, |
|
{ |
|
"epoch": 0.764221108575501, |
|
"grad_norm": 1.8963505029678345, |
|
"learning_rate": 7.642848477496056e-08, |
|
"loss": 0.4328, |
|
"step": 6300 |
|
}, |
|
{ |
|
"epoch": 0.7763516023624136, |
|
"grad_norm": 1.947475552558899, |
|
"learning_rate": 7.764163532694406e-08, |
|
"loss": 0.4273, |
|
"step": 6400 |
|
}, |
|
{ |
|
"epoch": 0.7884820961493264, |
|
"grad_norm": 1.9949339628219604, |
|
"learning_rate": 7.885478587892758e-08, |
|
"loss": 0.4225, |
|
"step": 6500 |
|
}, |
|
{ |
|
"epoch": 0.8006125899362391, |
|
"grad_norm": 2.0207386016845703, |
|
"learning_rate": 8.006793643091108e-08, |
|
"loss": 0.4172, |
|
"step": 6600 |
|
}, |
|
{ |
|
"epoch": 0.8127430837231518, |
|
"grad_norm": 2.0674002170562744, |
|
"learning_rate": 8.128108698289458e-08, |
|
"loss": 0.4126, |
|
"step": 6700 |
|
}, |
|
{ |
|
"epoch": 0.8248735775100645, |
|
"grad_norm": 2.079556703567505, |
|
"learning_rate": 8.249423753487808e-08, |
|
"loss": 0.4069, |
|
"step": 6800 |
|
}, |
|
{ |
|
"epoch": 0.8370040712969772, |
|
"grad_norm": 2.115440607070923, |
|
"learning_rate": 8.370738808686158e-08, |
|
"loss": 0.4021, |
|
"step": 6900 |
|
}, |
|
{ |
|
"epoch": 0.84913456508389, |
|
"grad_norm": 2.1741063594818115, |
|
"learning_rate": 8.492053863884507e-08, |
|
"loss": 0.3977, |
|
"step": 7000 |
|
}, |
|
{ |
|
"epoch": 0.8612650588708026, |
|
"grad_norm": 2.1430447101593018, |
|
"learning_rate": 8.613368919082857e-08, |
|
"loss": 0.3929, |
|
"step": 7100 |
|
}, |
|
{ |
|
"epoch": 0.8733955526577154, |
|
"grad_norm": 2.102972984313965, |
|
"learning_rate": 8.734683974281209e-08, |
|
"loss": 0.3872, |
|
"step": 7200 |
|
}, |
|
{ |
|
"epoch": 0.8855260464446281, |
|
"grad_norm": 2.0882880687713623, |
|
"learning_rate": 8.855999029479559e-08, |
|
"loss": 0.3817, |
|
"step": 7300 |
|
}, |
|
{ |
|
"epoch": 0.8976565402315408, |
|
"grad_norm": 1.9396212100982666, |
|
"learning_rate": 8.977314084677909e-08, |
|
"loss": 0.3772, |
|
"step": 7400 |
|
}, |
|
{ |
|
"epoch": 0.9097870340184535, |
|
"grad_norm": 1.8905789852142334, |
|
"learning_rate": 9.098629139876259e-08, |
|
"loss": 0.3714, |
|
"step": 7500 |
|
}, |
|
{ |
|
"epoch": 0.9219175278053662, |
|
"grad_norm": 1.8100907802581787, |
|
"learning_rate": 9.219944195074609e-08, |
|
"loss": 0.3673, |
|
"step": 7600 |
|
}, |
|
{ |
|
"epoch": 0.934048021592279, |
|
"grad_norm": 1.6033921241760254, |
|
"learning_rate": 9.341259250272959e-08, |
|
"loss": 0.3621, |
|
"step": 7700 |
|
}, |
|
{ |
|
"epoch": 0.9461785153791916, |
|
"grad_norm": 1.5708930492401123, |
|
"learning_rate": 9.462574305471308e-08, |
|
"loss": 0.3572, |
|
"step": 7800 |
|
}, |
|
{ |
|
"epoch": 0.9583090091661044, |
|
"grad_norm": 1.5194717645645142, |
|
"learning_rate": 9.58388936066966e-08, |
|
"loss": 0.3529, |
|
"step": 7900 |
|
}, |
|
{ |
|
"epoch": 0.9704395029530171, |
|
"grad_norm": 1.5393712520599365, |
|
"learning_rate": 9.70520441586801e-08, |
|
"loss": 0.3479, |
|
"step": 8000 |
|
}, |
|
{ |
|
"epoch": 0.9825699967399298, |
|
"grad_norm": 1.4730511903762817, |
|
"learning_rate": 9.82651947106636e-08, |
|
"loss": 0.3436, |
|
"step": 8100 |
|
}, |
|
{ |
|
"epoch": 0.9947004905268425, |
|
"grad_norm": 1.4694753885269165, |
|
"learning_rate": 9.94783452626471e-08, |
|
"loss": 0.3392, |
|
"step": 8200 |
|
}, |
|
{ |
|
"epoch": 1.0068309843137553, |
|
"grad_norm": 1.4455537796020508, |
|
"learning_rate": 9.992451507676548e-08, |
|
"loss": 0.3344, |
|
"step": 8300 |
|
}, |
|
{ |
|
"epoch": 1.018961478100668, |
|
"grad_norm": 1.4519261121749878, |
|
"learning_rate": 9.978972057098953e-08, |
|
"loss": 0.3293, |
|
"step": 8400 |
|
}, |
|
{ |
|
"epoch": 1.0310919718875806, |
|
"grad_norm": 1.4432621002197266, |
|
"learning_rate": 9.965492606521357e-08, |
|
"loss": 0.3248, |
|
"step": 8500 |
|
}, |
|
{ |
|
"epoch": 1.0432224656744933, |
|
"grad_norm": 1.414642095565796, |
|
"learning_rate": 9.952013155943763e-08, |
|
"loss": 0.3201, |
|
"step": 8600 |
|
}, |
|
{ |
|
"epoch": 1.0553529594614062, |
|
"grad_norm": 1.382421851158142, |
|
"learning_rate": 9.938533705366169e-08, |
|
"loss": 0.3154, |
|
"step": 8700 |
|
}, |
|
{ |
|
"epoch": 1.0674834532483188, |
|
"grad_norm": 1.314692497253418, |
|
"learning_rate": 9.925054254788574e-08, |
|
"loss": 0.3107, |
|
"step": 8800 |
|
}, |
|
{ |
|
"epoch": 1.0796139470352315, |
|
"grad_norm": 1.253720998764038, |
|
"learning_rate": 9.91157480421098e-08, |
|
"loss": 0.3064, |
|
"step": 8900 |
|
}, |
|
{ |
|
"epoch": 1.0917444408221442, |
|
"grad_norm": 1.1932885646820068, |
|
"learning_rate": 9.898095353633386e-08, |
|
"loss": 0.3021, |
|
"step": 9000 |
|
}, |
|
{ |
|
"epoch": 1.1038749346090568, |
|
"grad_norm": 1.1070135831832886, |
|
"learning_rate": 9.884615903055791e-08, |
|
"loss": 0.2975, |
|
"step": 9100 |
|
}, |
|
{ |
|
"epoch": 1.1160054283959697, |
|
"grad_norm": 1.03802490234375, |
|
"learning_rate": 9.871136452478198e-08, |
|
"loss": 0.2931, |
|
"step": 9200 |
|
}, |
|
{ |
|
"epoch": 1.1281359221828824, |
|
"grad_norm": 1.0262868404388428, |
|
"learning_rate": 9.857657001900603e-08, |
|
"loss": 0.2892, |
|
"step": 9300 |
|
}, |
|
{ |
|
"epoch": 1.140266415969795, |
|
"grad_norm": 0.973850667476654, |
|
"learning_rate": 9.844177551323008e-08, |
|
"loss": 0.2849, |
|
"step": 9400 |
|
}, |
|
{ |
|
"epoch": 1.1523969097567077, |
|
"grad_norm": 0.9496479034423828, |
|
"learning_rate": 9.830698100745413e-08, |
|
"loss": 0.281, |
|
"step": 9500 |
|
}, |
|
{ |
|
"epoch": 1.1645274035436204, |
|
"grad_norm": 0.9395837187767029, |
|
"learning_rate": 9.817218650167819e-08, |
|
"loss": 0.2772, |
|
"step": 9600 |
|
}, |
|
{ |
|
"epoch": 1.1766578973305333, |
|
"grad_norm": 0.9417792558670044, |
|
"learning_rate": 9.803739199590224e-08, |
|
"loss": 0.2731, |
|
"step": 9700 |
|
}, |
|
{ |
|
"epoch": 1.188788391117446, |
|
"grad_norm": 0.9536375999450684, |
|
"learning_rate": 9.790259749012629e-08, |
|
"loss": 0.2691, |
|
"step": 9800 |
|
}, |
|
{ |
|
"epoch": 1.2009188849043586, |
|
"grad_norm": 0.9853664040565491, |
|
"learning_rate": 9.776780298435036e-08, |
|
"loss": 0.2652, |
|
"step": 9900 |
|
}, |
|
{ |
|
"epoch": 1.2130493786912713, |
|
"grad_norm": 0.9936960339546204, |
|
"learning_rate": 9.763300847857441e-08, |
|
"loss": 0.2611, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.2130493786912713, |
|
"eval_loss": 0.25932416319847107, |
|
"eval_runtime": 12876.8542, |
|
"eval_samples_per_second": 32.778, |
|
"eval_steps_per_second": 4.097, |
|
"step": 10000 |
|
}, |
|
{ |
|
"epoch": 1.2251798724781842, |
|
"grad_norm": 1.0140084028244019, |
|
"learning_rate": 9.749821397279846e-08, |
|
"loss": 0.2568, |
|
"step": 10100 |
|
}, |
|
{ |
|
"epoch": 1.2373103662650968, |
|
"grad_norm": 1.0273332595825195, |
|
"learning_rate": 9.736341946702253e-08, |
|
"loss": 0.2524, |
|
"step": 10200 |
|
}, |
|
{ |
|
"epoch": 1.2494408600520095, |
|
"grad_norm": 1.0327990055084229, |
|
"learning_rate": 9.722862496124658e-08, |
|
"loss": 0.2478, |
|
"step": 10300 |
|
}, |
|
{ |
|
"epoch": 1.2615713538389222, |
|
"grad_norm": 1.0590392351150513, |
|
"learning_rate": 9.709383045547062e-08, |
|
"loss": 0.243, |
|
"step": 10400 |
|
}, |
|
{ |
|
"epoch": 1.2737018476258348, |
|
"grad_norm": 1.0778623819351196, |
|
"learning_rate": 9.695903594969469e-08, |
|
"loss": 0.2381, |
|
"step": 10500 |
|
}, |
|
{ |
|
"epoch": 1.2858323414127477, |
|
"grad_norm": 1.104440450668335, |
|
"learning_rate": 9.682424144391874e-08, |
|
"loss": 0.2332, |
|
"step": 10600 |
|
}, |
|
{ |
|
"epoch": 1.2979628351996604, |
|
"grad_norm": 1.1143620014190674, |
|
"learning_rate": 9.668944693814279e-08, |
|
"loss": 0.228, |
|
"step": 10700 |
|
}, |
|
{ |
|
"epoch": 1.310093328986573, |
|
"grad_norm": 1.1427433490753174, |
|
"learning_rate": 9.655465243236686e-08, |
|
"loss": 0.2227, |
|
"step": 10800 |
|
}, |
|
{ |
|
"epoch": 1.3222238227734857, |
|
"grad_norm": 1.145898461341858, |
|
"learning_rate": 9.641985792659091e-08, |
|
"loss": 0.2173, |
|
"step": 10900 |
|
}, |
|
{ |
|
"epoch": 1.3343543165603986, |
|
"grad_norm": 1.178074598312378, |
|
"learning_rate": 9.628506342081496e-08, |
|
"loss": 0.2115, |
|
"step": 11000 |
|
}, |
|
{ |
|
"epoch": 1.3464848103473113, |
|
"grad_norm": 1.174429178237915, |
|
"learning_rate": 9.615026891503903e-08, |
|
"loss": 0.2058, |
|
"step": 11100 |
|
}, |
|
{ |
|
"epoch": 1.358615304134224, |
|
"grad_norm": 1.1728930473327637, |
|
"learning_rate": 9.601547440926308e-08, |
|
"loss": 0.1998, |
|
"step": 11200 |
|
}, |
|
{ |
|
"epoch": 1.3707457979211366, |
|
"grad_norm": 1.1560231447219849, |
|
"learning_rate": 9.588067990348712e-08, |
|
"loss": 0.1936, |
|
"step": 11300 |
|
}, |
|
{ |
|
"epoch": 1.3828762917080493, |
|
"grad_norm": 1.1736541986465454, |
|
"learning_rate": 9.574588539771119e-08, |
|
"loss": 0.1875, |
|
"step": 11400 |
|
}, |
|
{ |
|
"epoch": 1.395006785494962, |
|
"grad_norm": 1.231817603111267, |
|
"learning_rate": 9.561109089193524e-08, |
|
"loss": 0.1814, |
|
"step": 11500 |
|
}, |
|
{ |
|
"epoch": 1.4071372792818748, |
|
"grad_norm": 1.5932906866073608, |
|
"learning_rate": 9.547629638615929e-08, |
|
"loss": 0.1748, |
|
"step": 11600 |
|
}, |
|
{ |
|
"epoch": 1.4192677730687875, |
|
"grad_norm": 2.940645456314087, |
|
"learning_rate": 9.534150188038334e-08, |
|
"loss": 0.1673, |
|
"step": 11700 |
|
}, |
|
{ |
|
"epoch": 1.4313982668557002, |
|
"grad_norm": 2.8523619174957275, |
|
"learning_rate": 9.520805531966517e-08, |
|
"loss": 0.1566, |
|
"step": 11800 |
|
}, |
|
{ |
|
"epoch": 1.443528760642613, |
|
"grad_norm": 0.9756022691726685, |
|
"learning_rate": 9.507326081388922e-08, |
|
"loss": 0.1509, |
|
"step": 11900 |
|
}, |
|
{ |
|
"epoch": 1.4556592544295257, |
|
"grad_norm": 1.0255942344665527, |
|
"learning_rate": 9.493846630811327e-08, |
|
"loss": 0.1467, |
|
"step": 12000 |
|
}, |
|
{ |
|
"epoch": 1.4677897482164384, |
|
"grad_norm": 2.2742061614990234, |
|
"learning_rate": 9.480367180233734e-08, |
|
"loss": 0.143, |
|
"step": 12100 |
|
}, |
|
{ |
|
"epoch": 1.479920242003351, |
|
"grad_norm": 2.624950408935547, |
|
"learning_rate": 9.466887729656139e-08, |
|
"loss": 0.14, |
|
"step": 12200 |
|
}, |
|
{ |
|
"epoch": 1.4920507357902637, |
|
"grad_norm": 5.254027366638184, |
|
"learning_rate": 9.453408279078544e-08, |
|
"loss": 0.1371, |
|
"step": 12300 |
|
}, |
|
{ |
|
"epoch": 1.5041812295771764, |
|
"grad_norm": 2.4900190830230713, |
|
"learning_rate": 9.439928828500951e-08, |
|
"loss": 0.1342, |
|
"step": 12400 |
|
}, |
|
{ |
|
"epoch": 1.516311723364089, |
|
"grad_norm": 1.6087634563446045, |
|
"learning_rate": 9.426449377923355e-08, |
|
"loss": 0.1315, |
|
"step": 12500 |
|
}, |
|
{ |
|
"epoch": 1.528442217151002, |
|
"grad_norm": 2.786614179611206, |
|
"learning_rate": 9.41296992734576e-08, |
|
"loss": 0.1289, |
|
"step": 12600 |
|
}, |
|
{ |
|
"epoch": 1.5405727109379146, |
|
"grad_norm": 5.523770809173584, |
|
"learning_rate": 9.399490476768165e-08, |
|
"loss": 0.1269, |
|
"step": 12700 |
|
}, |
|
{ |
|
"epoch": 1.5527032047248275, |
|
"grad_norm": 7.25140380859375, |
|
"learning_rate": 9.386011026190572e-08, |
|
"loss": 0.1243, |
|
"step": 12800 |
|
}, |
|
{ |
|
"epoch": 1.5648336985117401, |
|
"grad_norm": 1.2261604070663452, |
|
"learning_rate": 9.372531575612977e-08, |
|
"loss": 0.1213, |
|
"step": 12900 |
|
}, |
|
{ |
|
"epoch": 1.5769641922986528, |
|
"grad_norm": 1.0399848222732544, |
|
"learning_rate": 9.359052125035383e-08, |
|
"loss": 0.1176, |
|
"step": 13000 |
|
}, |
|
{ |
|
"epoch": 1.5890946860855655, |
|
"grad_norm": 1.0358467102050781, |
|
"learning_rate": 9.345572674457789e-08, |
|
"loss": 0.1138, |
|
"step": 13100 |
|
}, |
|
{ |
|
"epoch": 1.6012251798724781, |
|
"grad_norm": 1.7050210237503052, |
|
"learning_rate": 9.332093223880194e-08, |
|
"loss": 0.1105, |
|
"step": 13200 |
|
}, |
|
{ |
|
"epoch": 1.6133556736593908, |
|
"grad_norm": 6.696021556854248, |
|
"learning_rate": 9.3186137733026e-08, |
|
"loss": 0.1078, |
|
"step": 13300 |
|
}, |
|
{ |
|
"epoch": 1.6254861674463035, |
|
"grad_norm": 1.208287239074707, |
|
"learning_rate": 9.305134322725005e-08, |
|
"loss": 0.1037, |
|
"step": 13400 |
|
}, |
|
{ |
|
"epoch": 1.6376166612332164, |
|
"grad_norm": 1.0631296634674072, |
|
"learning_rate": 9.29165487214741e-08, |
|
"loss": 0.0993, |
|
"step": 13500 |
|
}, |
|
{ |
|
"epoch": 1.649747155020129, |
|
"grad_norm": 1.4672938585281372, |
|
"learning_rate": 9.278175421569816e-08, |
|
"loss": 0.0952, |
|
"step": 13600 |
|
}, |
|
{ |
|
"epoch": 1.6618776488070417, |
|
"grad_norm": 1.4060345888137817, |
|
"learning_rate": 9.264695970992222e-08, |
|
"loss": 0.0913, |
|
"step": 13700 |
|
}, |
|
{ |
|
"epoch": 1.6740081425939546, |
|
"grad_norm": 7.0771026611328125, |
|
"learning_rate": 9.251351314920403e-08, |
|
"loss": 0.0881, |
|
"step": 13800 |
|
}, |
|
{ |
|
"epoch": 1.6861386363808673, |
|
"grad_norm": 3.3149595260620117, |
|
"learning_rate": 9.237871864342808e-08, |
|
"loss": 0.0855, |
|
"step": 13900 |
|
}, |
|
{ |
|
"epoch": 1.69826913016778, |
|
"grad_norm": 2.399245023727417, |
|
"learning_rate": 9.224392413765214e-08, |
|
"loss": 0.083, |
|
"step": 14000 |
|
}, |
|
{ |
|
"epoch": 1.7103996239546926, |
|
"grad_norm": 5.708673477172852, |
|
"learning_rate": 9.21091296318762e-08, |
|
"loss": 0.0811, |
|
"step": 14100 |
|
}, |
|
{ |
|
"epoch": 1.7225301177416052, |
|
"grad_norm": 2.2118566036224365, |
|
"learning_rate": 9.197433512610025e-08, |
|
"loss": 0.0788, |
|
"step": 14200 |
|
}, |
|
{ |
|
"epoch": 1.734660611528518, |
|
"grad_norm": 1.6321247816085815, |
|
"learning_rate": 9.183954062032431e-08, |
|
"loss": 0.0762, |
|
"step": 14300 |
|
}, |
|
{ |
|
"epoch": 1.7467911053154306, |
|
"grad_norm": 3.3088338375091553, |
|
"learning_rate": 9.170474611454837e-08, |
|
"loss": 0.0739, |
|
"step": 14400 |
|
}, |
|
{ |
|
"epoch": 1.7589215991023435, |
|
"grad_norm": 1.9590739011764526, |
|
"learning_rate": 9.156995160877243e-08, |
|
"loss": 0.071, |
|
"step": 14500 |
|
}, |
|
{ |
|
"epoch": 1.7710520928892561, |
|
"grad_norm": 2.8815441131591797, |
|
"learning_rate": 9.143515710299648e-08, |
|
"loss": 0.068, |
|
"step": 14600 |
|
}, |
|
{ |
|
"epoch": 1.783182586676169, |
|
"grad_norm": 2.3235983848571777, |
|
"learning_rate": 9.130036259722053e-08, |
|
"loss": 0.065, |
|
"step": 14700 |
|
}, |
|
{ |
|
"epoch": 1.7953130804630817, |
|
"grad_norm": 2.7600650787353516, |
|
"learning_rate": 9.116556809144458e-08, |
|
"loss": 0.0624, |
|
"step": 14800 |
|
}, |
|
{ |
|
"epoch": 1.8074435742499944, |
|
"grad_norm": 2.3273894786834717, |
|
"learning_rate": 9.103077358566864e-08, |
|
"loss": 0.0601, |
|
"step": 14900 |
|
}, |
|
{ |
|
"epoch": 1.819574068036907, |
|
"grad_norm": 5.992413520812988, |
|
"learning_rate": 9.08959790798927e-08, |
|
"loss": 0.0575, |
|
"step": 15000 |
|
}, |
|
{ |
|
"epoch": 1.8317045618238197, |
|
"grad_norm": 1.347684621810913, |
|
"learning_rate": 9.076118457411675e-08, |
|
"loss": 0.0542, |
|
"step": 15100 |
|
}, |
|
{ |
|
"epoch": 1.8438350556107324, |
|
"grad_norm": 0.8873547315597534, |
|
"learning_rate": 9.062639006834081e-08, |
|
"loss": 0.0503, |
|
"step": 15200 |
|
}, |
|
{ |
|
"epoch": 1.855965549397645, |
|
"grad_norm": 6.997786521911621, |
|
"learning_rate": 9.049159556256487e-08, |
|
"loss": 0.0468, |
|
"step": 15300 |
|
}, |
|
{ |
|
"epoch": 1.868096043184558, |
|
"grad_norm": 6.812012195587158, |
|
"learning_rate": 9.035680105678893e-08, |
|
"loss": 0.0447, |
|
"step": 15400 |
|
}, |
|
{ |
|
"epoch": 1.8802265369714706, |
|
"grad_norm": 6.6378350257873535, |
|
"learning_rate": 9.022200655101298e-08, |
|
"loss": 0.043, |
|
"step": 15500 |
|
}, |
|
{ |
|
"epoch": 1.8923570307583835, |
|
"grad_norm": 2.3120977878570557, |
|
"learning_rate": 9.008721204523703e-08, |
|
"loss": 0.0411, |
|
"step": 15600 |
|
}, |
|
{ |
|
"epoch": 1.9044875245452961, |
|
"grad_norm": 3.212885618209839, |
|
"learning_rate": 8.995241753946108e-08, |
|
"loss": 0.0394, |
|
"step": 15700 |
|
}, |
|
{ |
|
"epoch": 1.9166180183322088, |
|
"grad_norm": 2.6104061603546143, |
|
"learning_rate": 8.981762303368514e-08, |
|
"loss": 0.0379, |
|
"step": 15800 |
|
}, |
|
{ |
|
"epoch": 1.9287485121191215, |
|
"grad_norm": 1.1504628658294678, |
|
"learning_rate": 8.968282852790919e-08, |
|
"loss": 0.0357, |
|
"step": 15900 |
|
}, |
|
{ |
|
"epoch": 1.9408790059060341, |
|
"grad_norm": 0.8576985597610474, |
|
"learning_rate": 8.954938196719101e-08, |
|
"loss": 0.0334, |
|
"step": 16000 |
|
}, |
|
{ |
|
"epoch": 1.9530094996929468, |
|
"grad_norm": 3.4249160289764404, |
|
"learning_rate": 8.941458746141506e-08, |
|
"loss": 0.0309, |
|
"step": 16100 |
|
}, |
|
{ |
|
"epoch": 1.9651399934798595, |
|
"grad_norm": 3.397020101547241, |
|
"learning_rate": 8.927979295563912e-08, |
|
"loss": 0.0285, |
|
"step": 16200 |
|
}, |
|
{ |
|
"epoch": 1.9772704872667723, |
|
"grad_norm": 0.8086531162261963, |
|
"learning_rate": 8.914499844986318e-08, |
|
"loss": 0.0265, |
|
"step": 16300 |
|
}, |
|
{ |
|
"epoch": 1.989400981053685, |
|
"grad_norm": 0.8438703417778015, |
|
"learning_rate": 8.901020394408724e-08, |
|
"loss": 0.0245, |
|
"step": 16400 |
|
}, |
|
{ |
|
"epoch": 2.001531474840598, |
|
"grad_norm": 0.8428291082382202, |
|
"learning_rate": 8.887540943831129e-08, |
|
"loss": 0.0227, |
|
"step": 16500 |
|
}, |
|
{ |
|
"epoch": 2.0136619686275106, |
|
"grad_norm": 1.706946611404419, |
|
"learning_rate": 8.874061493253534e-08, |
|
"loss": 0.0212, |
|
"step": 16600 |
|
}, |
|
{ |
|
"epoch": 2.0257924624144232, |
|
"grad_norm": 1.5371583700180054, |
|
"learning_rate": 8.860582042675941e-08, |
|
"loss": 0.02, |
|
"step": 16700 |
|
}, |
|
{ |
|
"epoch": 2.037922956201336, |
|
"grad_norm": 1.4825010299682617, |
|
"learning_rate": 8.847102592098346e-08, |
|
"loss": 0.0188, |
|
"step": 16800 |
|
}, |
|
{ |
|
"epoch": 2.0500534499882486, |
|
"grad_norm": 0.9448016285896301, |
|
"learning_rate": 8.833623141520751e-08, |
|
"loss": 0.0176, |
|
"step": 16900 |
|
}, |
|
{ |
|
"epoch": 2.0621839437751612, |
|
"grad_norm": 1.871368169784546, |
|
"learning_rate": 8.820143690943157e-08, |
|
"loss": 0.0163, |
|
"step": 17000 |
|
}, |
|
{ |
|
"epoch": 2.074314437562074, |
|
"grad_norm": 0.7671025991439819, |
|
"learning_rate": 8.806664240365562e-08, |
|
"loss": 0.0153, |
|
"step": 17100 |
|
}, |
|
{ |
|
"epoch": 2.0864449313489866, |
|
"grad_norm": 0.6203155517578125, |
|
"learning_rate": 8.793184789787967e-08, |
|
"loss": 0.0142, |
|
"step": 17200 |
|
}, |
|
{ |
|
"epoch": 2.0985754251358992, |
|
"grad_norm": 0.5058071613311768, |
|
"learning_rate": 8.779705339210374e-08, |
|
"loss": 0.0132, |
|
"step": 17300 |
|
}, |
|
{ |
|
"epoch": 2.1107059189228123, |
|
"grad_norm": 1.8204731941223145, |
|
"learning_rate": 8.766225888632779e-08, |
|
"loss": 0.0122, |
|
"step": 17400 |
|
}, |
|
{ |
|
"epoch": 2.122836412709725, |
|
"grad_norm": 0.3997783660888672, |
|
"learning_rate": 8.752746438055184e-08, |
|
"loss": 0.0114, |
|
"step": 17500 |
|
}, |
|
{ |
|
"epoch": 2.1349669064966377, |
|
"grad_norm": 0.304776668548584, |
|
"learning_rate": 8.739266987477591e-08, |
|
"loss": 0.0106, |
|
"step": 17600 |
|
}, |
|
{ |
|
"epoch": 2.1470974002835503, |
|
"grad_norm": 0.31530994176864624, |
|
"learning_rate": 8.725787536899996e-08, |
|
"loss": 0.0098, |
|
"step": 17700 |
|
}, |
|
{ |
|
"epoch": 2.159227894070463, |
|
"grad_norm": 0.3956185579299927, |
|
"learning_rate": 8.712308086322401e-08, |
|
"loss": 0.0091, |
|
"step": 17800 |
|
}, |
|
{ |
|
"epoch": 2.1713583878573757, |
|
"grad_norm": 1.1947382688522339, |
|
"learning_rate": 8.698828635744807e-08, |
|
"loss": 0.0084, |
|
"step": 17900 |
|
}, |
|
{ |
|
"epoch": 2.1834888816442883, |
|
"grad_norm": 0.33143043518066406, |
|
"learning_rate": 8.685349185167212e-08, |
|
"loss": 0.0078, |
|
"step": 18000 |
|
}, |
|
{ |
|
"epoch": 2.195619375431201, |
|
"grad_norm": 0.46097975969314575, |
|
"learning_rate": 8.671869734589617e-08, |
|
"loss": 0.0073, |
|
"step": 18100 |
|
}, |
|
{ |
|
"epoch": 2.2077498692181137, |
|
"grad_norm": 0.28613144159317017, |
|
"learning_rate": 8.658390284012024e-08, |
|
"loss": 0.0068, |
|
"step": 18200 |
|
}, |
|
{ |
|
"epoch": 2.219880363005027, |
|
"grad_norm": 0.339844286441803, |
|
"learning_rate": 8.644910833434429e-08, |
|
"loss": 0.0063, |
|
"step": 18300 |
|
}, |
|
{ |
|
"epoch": 2.2320108567919394, |
|
"grad_norm": 0.7511897683143616, |
|
"learning_rate": 8.631431382856834e-08, |
|
"loss": 0.0059, |
|
"step": 18400 |
|
}, |
|
{ |
|
"epoch": 2.244141350578852, |
|
"grad_norm": 0.6955689787864685, |
|
"learning_rate": 8.617951932279241e-08, |
|
"loss": 0.0055, |
|
"step": 18500 |
|
}, |
|
{ |
|
"epoch": 2.256271844365765, |
|
"grad_norm": 0.40644150972366333, |
|
"learning_rate": 8.604472481701646e-08, |
|
"loss": 0.0051, |
|
"step": 18600 |
|
}, |
|
{ |
|
"epoch": 2.2684023381526774, |
|
"grad_norm": 0.23919856548309326, |
|
"learning_rate": 8.590993031124051e-08, |
|
"loss": 0.0048, |
|
"step": 18700 |
|
}, |
|
{ |
|
"epoch": 2.28053283193959, |
|
"grad_norm": 0.40758979320526123, |
|
"learning_rate": 8.577513580546457e-08, |
|
"loss": 0.0045, |
|
"step": 18800 |
|
}, |
|
{ |
|
"epoch": 2.2926633257265028, |
|
"grad_norm": 0.3676837384700775, |
|
"learning_rate": 8.564034129968862e-08, |
|
"loss": 0.0043, |
|
"step": 18900 |
|
}, |
|
{ |
|
"epoch": 2.3047938195134154, |
|
"grad_norm": 0.27469906210899353, |
|
"learning_rate": 8.550554679391267e-08, |
|
"loss": 0.004, |
|
"step": 19000 |
|
}, |
|
{ |
|
"epoch": 2.316924313300328, |
|
"grad_norm": 0.4906423091888428, |
|
"learning_rate": 8.537075228813672e-08, |
|
"loss": 0.0037, |
|
"step": 19100 |
|
}, |
|
{ |
|
"epoch": 2.3290548070872408, |
|
"grad_norm": 0.3602069616317749, |
|
"learning_rate": 8.523595778236079e-08, |
|
"loss": 0.0035, |
|
"step": 19200 |
|
}, |
|
{ |
|
"epoch": 2.341185300874154, |
|
"grad_norm": 0.38956815004348755, |
|
"learning_rate": 8.510116327658484e-08, |
|
"loss": 0.0033, |
|
"step": 19300 |
|
}, |
|
{ |
|
"epoch": 2.3533157946610666, |
|
"grad_norm": 0.34021806716918945, |
|
"learning_rate": 8.49663687708089e-08, |
|
"loss": 0.0031, |
|
"step": 19400 |
|
}, |
|
{ |
|
"epoch": 2.365446288447979, |
|
"grad_norm": 0.42637619376182556, |
|
"learning_rate": 8.483157426503296e-08, |
|
"loss": 0.0029, |
|
"step": 19500 |
|
}, |
|
{ |
|
"epoch": 2.377576782234892, |
|
"grad_norm": 0.2764069437980652, |
|
"learning_rate": 8.469677975925701e-08, |
|
"loss": 0.0027, |
|
"step": 19600 |
|
}, |
|
{ |
|
"epoch": 2.3897072760218045, |
|
"grad_norm": 0.14541544020175934, |
|
"learning_rate": 8.456198525348107e-08, |
|
"loss": 0.0026, |
|
"step": 19700 |
|
}, |
|
{ |
|
"epoch": 2.401837769808717, |
|
"grad_norm": 0.2202480435371399, |
|
"learning_rate": 8.442719074770512e-08, |
|
"loss": 0.0024, |
|
"step": 19800 |
|
}, |
|
{ |
|
"epoch": 2.41396826359563, |
|
"grad_norm": 0.09995169192552567, |
|
"learning_rate": 8.429239624192917e-08, |
|
"loss": 0.0023, |
|
"step": 19900 |
|
}, |
|
{ |
|
"epoch": 2.4260987573825425, |
|
"grad_norm": 0.20967301726341248, |
|
"learning_rate": 8.415760173615322e-08, |
|
"loss": 0.0022, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.4260987573825425, |
|
"eval_loss": 0.0021133332047611475, |
|
"eval_runtime": 12677.9868, |
|
"eval_samples_per_second": 33.292, |
|
"eval_steps_per_second": 4.162, |
|
"step": 20000 |
|
}, |
|
{ |
|
"epoch": 2.4382292511694557, |
|
"grad_norm": 0.26037752628326416, |
|
"learning_rate": 8.402280723037729e-08, |
|
"loss": 0.0021, |
|
"step": 20100 |
|
}, |
|
{ |
|
"epoch": 2.4503597449563683, |
|
"grad_norm": 0.2062160074710846, |
|
"learning_rate": 8.388801272460134e-08, |
|
"loss": 0.0019, |
|
"step": 20200 |
|
}, |
|
{ |
|
"epoch": 2.462490238743281, |
|
"grad_norm": 0.14383727312088013, |
|
"learning_rate": 8.37532182188254e-08, |
|
"loss": 0.0018, |
|
"step": 20300 |
|
}, |
|
{ |
|
"epoch": 2.4746207325301937, |
|
"grad_norm": 0.15886737406253815, |
|
"learning_rate": 8.361842371304946e-08, |
|
"loss": 0.0017, |
|
"step": 20400 |
|
}, |
|
{ |
|
"epoch": 2.4867512263171063, |
|
"grad_norm": 0.11849108338356018, |
|
"learning_rate": 8.348362920727351e-08, |
|
"loss": 0.0016, |
|
"step": 20500 |
|
}, |
|
{ |
|
"epoch": 2.498881720104019, |
|
"grad_norm": 0.14686524868011475, |
|
"learning_rate": 8.334883470149757e-08, |
|
"loss": 0.0015, |
|
"step": 20600 |
|
}, |
|
{ |
|
"epoch": 2.5110122138909317, |
|
"grad_norm": 0.20525987446308136, |
|
"learning_rate": 8.321404019572162e-08, |
|
"loss": 0.0015, |
|
"step": 20700 |
|
}, |
|
{ |
|
"epoch": 2.5231427076778443, |
|
"grad_norm": 0.23715050518512726, |
|
"learning_rate": 8.307924568994567e-08, |
|
"loss": 0.0014, |
|
"step": 20800 |
|
}, |
|
{ |
|
"epoch": 2.535273201464757, |
|
"grad_norm": 0.16575060784816742, |
|
"learning_rate": 8.294445118416972e-08, |
|
"loss": 0.0013, |
|
"step": 20900 |
|
}, |
|
{ |
|
"epoch": 2.5474036952516697, |
|
"grad_norm": 0.21624301373958588, |
|
"learning_rate": 8.280965667839378e-08, |
|
"loss": 0.0012, |
|
"step": 21000 |
|
}, |
|
{ |
|
"epoch": 2.5595341890385823, |
|
"grad_norm": 0.2248183786869049, |
|
"learning_rate": 8.267486217261784e-08, |
|
"loss": 0.0012, |
|
"step": 21100 |
|
}, |
|
{ |
|
"epoch": 2.5716646828254954, |
|
"grad_norm": 0.09357228130102158, |
|
"learning_rate": 8.25400676668419e-08, |
|
"loss": 0.0011, |
|
"step": 21200 |
|
}, |
|
{ |
|
"epoch": 2.583795176612408, |
|
"grad_norm": 0.14724239706993103, |
|
"learning_rate": 8.240527316106595e-08, |
|
"loss": 0.001, |
|
"step": 21300 |
|
}, |
|
{ |
|
"epoch": 2.5959256703993208, |
|
"grad_norm": 0.09311047941446304, |
|
"learning_rate": 8.227047865529001e-08, |
|
"loss": 0.001, |
|
"step": 21400 |
|
}, |
|
{ |
|
"epoch": 2.6080561641862334, |
|
"grad_norm": 0.13593658804893494, |
|
"learning_rate": 8.213568414951407e-08, |
|
"loss": 0.0009, |
|
"step": 21500 |
|
}, |
|
{ |
|
"epoch": 2.620186657973146, |
|
"grad_norm": 0.12716355919837952, |
|
"learning_rate": 8.200088964373812e-08, |
|
"loss": 0.0009, |
|
"step": 21600 |
|
}, |
|
{ |
|
"epoch": 2.6323171517600588, |
|
"grad_norm": 0.09387937188148499, |
|
"learning_rate": 8.186609513796217e-08, |
|
"loss": 0.0008, |
|
"step": 21700 |
|
}, |
|
{ |
|
"epoch": 2.6444476455469714, |
|
"grad_norm": 0.09374915808439255, |
|
"learning_rate": 8.173130063218623e-08, |
|
"loss": 0.0008, |
|
"step": 21800 |
|
}, |
|
{ |
|
"epoch": 2.6565781393338845, |
|
"grad_norm": 0.09871383756399155, |
|
"learning_rate": 8.159650612641028e-08, |
|
"loss": 0.0008, |
|
"step": 21900 |
|
}, |
|
{ |
|
"epoch": 2.668708633120797, |
|
"grad_norm": 0.20628078281879425, |
|
"learning_rate": 8.146171162063434e-08, |
|
"loss": 0.0007, |
|
"step": 22000 |
|
}, |
|
{ |
|
"epoch": 2.68083912690771, |
|
"grad_norm": 0.06819931417703629, |
|
"learning_rate": 8.13269171148584e-08, |
|
"loss": 0.0007, |
|
"step": 22100 |
|
}, |
|
{ |
|
"epoch": 2.6929696206946225, |
|
"grad_norm": 0.09931056201457977, |
|
"learning_rate": 8.119212260908245e-08, |
|
"loss": 0.0006, |
|
"step": 22200 |
|
}, |
|
{ |
|
"epoch": 2.705100114481535, |
|
"grad_norm": 0.11566577851772308, |
|
"learning_rate": 8.105732810330652e-08, |
|
"loss": 0.0006, |
|
"step": 22300 |
|
}, |
|
{ |
|
"epoch": 2.717230608268448, |
|
"grad_norm": 0.06021908298134804, |
|
"learning_rate": 8.092253359753057e-08, |
|
"loss": 0.0006, |
|
"step": 22400 |
|
}, |
|
{ |
|
"epoch": 2.7293611020553605, |
|
"grad_norm": 0.07083894312381744, |
|
"learning_rate": 8.078773909175462e-08, |
|
"loss": 0.0005, |
|
"step": 22500 |
|
}, |
|
{ |
|
"epoch": 2.741491595842273, |
|
"grad_norm": 0.036980826407670975, |
|
"learning_rate": 8.065294458597867e-08, |
|
"loss": 0.0005, |
|
"step": 22600 |
|
}, |
|
{ |
|
"epoch": 2.753622089629186, |
|
"grad_norm": 0.15954025089740753, |
|
"learning_rate": 8.051815008020273e-08, |
|
"loss": 0.0005, |
|
"step": 22700 |
|
}, |
|
{ |
|
"epoch": 2.7657525834160985, |
|
"grad_norm": 0.08207129687070847, |
|
"learning_rate": 8.038335557442678e-08, |
|
"loss": 0.0005, |
|
"step": 22800 |
|
}, |
|
{ |
|
"epoch": 2.777883077203011, |
|
"grad_norm": 0.05309203267097473, |
|
"learning_rate": 8.024856106865083e-08, |
|
"loss": 0.0004, |
|
"step": 22900 |
|
}, |
|
{ |
|
"epoch": 2.790013570989924, |
|
"grad_norm": 0.06624484062194824, |
|
"learning_rate": 8.01137665628749e-08, |
|
"loss": 0.0004, |
|
"step": 23000 |
|
}, |
|
{ |
|
"epoch": 2.802144064776837, |
|
"grad_norm": 0.049933817237615585, |
|
"learning_rate": 7.997897205709895e-08, |
|
"loss": 0.0004, |
|
"step": 23100 |
|
}, |
|
{ |
|
"epoch": 2.8142745585637496, |
|
"grad_norm": 0.04547886550426483, |
|
"learning_rate": 7.9844177551323e-08, |
|
"loss": 0.0004, |
|
"step": 23200 |
|
}, |
|
{ |
|
"epoch": 2.8264050523506623, |
|
"grad_norm": 0.049001339823007584, |
|
"learning_rate": 7.970938304554707e-08, |
|
"loss": 0.0004, |
|
"step": 23300 |
|
}, |
|
{ |
|
"epoch": 2.838535546137575, |
|
"grad_norm": 0.04988383874297142, |
|
"learning_rate": 7.957458853977112e-08, |
|
"loss": 0.0003, |
|
"step": 23400 |
|
}, |
|
{ |
|
"epoch": 2.8506660399244876, |
|
"grad_norm": 0.02339191362261772, |
|
"learning_rate": 7.943979403399516e-08, |
|
"loss": 0.0003, |
|
"step": 23500 |
|
}, |
|
{ |
|
"epoch": 2.8627965337114003, |
|
"grad_norm": 0.06541607528924942, |
|
"learning_rate": 7.930499952821923e-08, |
|
"loss": 0.0003, |
|
"step": 23600 |
|
}, |
|
{ |
|
"epoch": 2.874927027498313, |
|
"grad_norm": 0.04095012694597244, |
|
"learning_rate": 7.917020502244328e-08, |
|
"loss": 0.0003, |
|
"step": 23700 |
|
}, |
|
{ |
|
"epoch": 2.887057521285226, |
|
"grad_norm": 0.031226731836795807, |
|
"learning_rate": 7.903541051666733e-08, |
|
"loss": 0.0003, |
|
"step": 23800 |
|
}, |
|
{ |
|
"epoch": 2.8991880150721387, |
|
"grad_norm": 0.0459061898291111, |
|
"learning_rate": 7.89006160108914e-08, |
|
"loss": 0.0003, |
|
"step": 23900 |
|
}, |
|
{ |
|
"epoch": 2.9113185088590514, |
|
"grad_norm": 0.016833819448947906, |
|
"learning_rate": 7.876582150511545e-08, |
|
"loss": 0.0003, |
|
"step": 24000 |
|
}, |
|
{ |
|
"epoch": 2.923449002645964, |
|
"grad_norm": 0.020327765494585037, |
|
"learning_rate": 7.86310269993395e-08, |
|
"loss": 0.0002, |
|
"step": 24100 |
|
}, |
|
{ |
|
"epoch": 2.9355794964328767, |
|
"grad_norm": 0.020058810710906982, |
|
"learning_rate": 7.849623249356357e-08, |
|
"loss": 0.0002, |
|
"step": 24200 |
|
}, |
|
{ |
|
"epoch": 2.9477099902197894, |
|
"grad_norm": 0.011464670300483704, |
|
"learning_rate": 7.836143798778762e-08, |
|
"loss": 0.0002, |
|
"step": 24300 |
|
}, |
|
{ |
|
"epoch": 2.959840484006702, |
|
"grad_norm": 0.008502807468175888, |
|
"learning_rate": 7.822664348201166e-08, |
|
"loss": 0.0002, |
|
"step": 24400 |
|
}, |
|
{ |
|
"epoch": 2.9719709777936147, |
|
"grad_norm": 0.007184523623436689, |
|
"learning_rate": 7.809184897623573e-08, |
|
"loss": 0.0002, |
|
"step": 24500 |
|
}, |
|
{ |
|
"epoch": 2.9841014715805274, |
|
"grad_norm": 0.008515238761901855, |
|
"learning_rate": 7.795705447045978e-08, |
|
"loss": 0.0002, |
|
"step": 24600 |
|
}, |
|
{ |
|
"epoch": 2.99623196536744, |
|
"grad_norm": 0.006969008129090071, |
|
"learning_rate": 7.782225996468383e-08, |
|
"loss": 0.0002, |
|
"step": 24700 |
|
}, |
|
{ |
|
"epoch": 3.0083624591543527, |
|
"grad_norm": 0.017829405143857002, |
|
"learning_rate": 7.768746545890788e-08, |
|
"loss": 0.0002, |
|
"step": 24800 |
|
}, |
|
{ |
|
"epoch": 3.020492952941266, |
|
"grad_norm": 0.006673099938780069, |
|
"learning_rate": 7.755267095313195e-08, |
|
"loss": 0.0002, |
|
"step": 24900 |
|
}, |
|
{ |
|
"epoch": 3.0326234467281785, |
|
"grad_norm": 0.04286098852753639, |
|
"learning_rate": 7.7417876447356e-08, |
|
"loss": 0.0001, |
|
"step": 25000 |
|
}, |
|
{ |
|
"epoch": 3.044753940515091, |
|
"grad_norm": 0.012195469811558723, |
|
"learning_rate": 7.728308194158006e-08, |
|
"loss": 0.0001, |
|
"step": 25100 |
|
}, |
|
{ |
|
"epoch": 3.056884434302004, |
|
"grad_norm": 0.02029520832002163, |
|
"learning_rate": 7.714828743580412e-08, |
|
"loss": 0.0001, |
|
"step": 25200 |
|
}, |
|
{ |
|
"epoch": 3.0690149280889165, |
|
"grad_norm": 0.02022946998476982, |
|
"learning_rate": 7.701349293002816e-08, |
|
"loss": 0.0001, |
|
"step": 25300 |
|
}, |
|
{ |
|
"epoch": 3.081145421875829, |
|
"grad_norm": 0.01885395683348179, |
|
"learning_rate": 7.687869842425221e-08, |
|
"loss": 0.0001, |
|
"step": 25400 |
|
}, |
|
{ |
|
"epoch": 3.093275915662742, |
|
"grad_norm": 0.020423822104930878, |
|
"learning_rate": 7.674390391847628e-08, |
|
"loss": 0.0001, |
|
"step": 25500 |
|
}, |
|
{ |
|
"epoch": 3.1054064094496545, |
|
"grad_norm": 0.012373251840472221, |
|
"learning_rate": 7.660910941270033e-08, |
|
"loss": 0.0001, |
|
"step": 25600 |
|
}, |
|
{ |
|
"epoch": 3.117536903236567, |
|
"grad_norm": 0.010483508929610252, |
|
"learning_rate": 7.647431490692439e-08, |
|
"loss": 0.0001, |
|
"step": 25700 |
|
}, |
|
{ |
|
"epoch": 3.1296673970234803, |
|
"grad_norm": 0.012648390606045723, |
|
"learning_rate": 7.633952040114845e-08, |
|
"loss": 0.0001, |
|
"step": 25800 |
|
}, |
|
{ |
|
"epoch": 3.141797890810393, |
|
"grad_norm": 0.003789283335208893, |
|
"learning_rate": 7.62047258953725e-08, |
|
"loss": 0.0001, |
|
"step": 25900 |
|
}, |
|
{ |
|
"epoch": 3.1539283845973056, |
|
"grad_norm": 0.003961450420320034, |
|
"learning_rate": 7.606993138959656e-08, |
|
"loss": 0.0001, |
|
"step": 26000 |
|
}, |
|
{ |
|
"epoch": 3.1660588783842183, |
|
"grad_norm": 0.005319498013705015, |
|
"learning_rate": 7.593513688382062e-08, |
|
"loss": 0.0001, |
|
"step": 26100 |
|
}, |
|
{ |
|
"epoch": 3.178189372171131, |
|
"grad_norm": 0.0033047376200556755, |
|
"learning_rate": 7.580034237804467e-08, |
|
"loss": 0.0001, |
|
"step": 26200 |
|
}, |
|
{ |
|
"epoch": 3.1903198659580436, |
|
"grad_norm": 0.019682567566633224, |
|
"learning_rate": 7.566554787226871e-08, |
|
"loss": 0.0001, |
|
"step": 26300 |
|
}, |
|
{ |
|
"epoch": 3.2024503597449563, |
|
"grad_norm": 0.015115097165107727, |
|
"learning_rate": 7.553075336649278e-08, |
|
"loss": 0.0001, |
|
"step": 26400 |
|
}, |
|
{ |
|
"epoch": 3.214580853531869, |
|
"grad_norm": 0.004491306375712156, |
|
"learning_rate": 7.539595886071683e-08, |
|
"loss": 0.0001, |
|
"step": 26500 |
|
}, |
|
{ |
|
"epoch": 3.2267113473187816, |
|
"grad_norm": 0.0067758746445178986, |
|
"learning_rate": 7.526116435494089e-08, |
|
"loss": 0.0001, |
|
"step": 26600 |
|
}, |
|
{ |
|
"epoch": 3.2388418411056943, |
|
"grad_norm": 0.009860140271484852, |
|
"learning_rate": 7.512636984916495e-08, |
|
"loss": 0.0001, |
|
"step": 26700 |
|
}, |
|
{ |
|
"epoch": 3.2509723348926074, |
|
"grad_norm": 0.00978156179189682, |
|
"learning_rate": 7.4991575343389e-08, |
|
"loss": 0.0001, |
|
"step": 26800 |
|
}, |
|
{ |
|
"epoch": 3.26310282867952, |
|
"grad_norm": 0.012902422808110714, |
|
"learning_rate": 7.485678083761306e-08, |
|
"loss": 0.0001, |
|
"step": 26900 |
|
}, |
|
{ |
|
"epoch": 3.2752333224664327, |
|
"grad_norm": 0.004666306544095278, |
|
"learning_rate": 7.472198633183711e-08, |
|
"loss": 0.0, |
|
"step": 27000 |
|
}, |
|
{ |
|
"epoch": 3.2873638162533454, |
|
"grad_norm": 0.00277140736579895, |
|
"learning_rate": 7.458719182606118e-08, |
|
"loss": 0.0, |
|
"step": 27100 |
|
}, |
|
{ |
|
"epoch": 3.299494310040258, |
|
"grad_norm": 0.0019914316944777966, |
|
"learning_rate": 7.445239732028521e-08, |
|
"loss": 0.0, |
|
"step": 27200 |
|
}, |
|
{ |
|
"epoch": 3.3116248038271707, |
|
"grad_norm": 0.0016523301601409912, |
|
"learning_rate": 7.431760281450927e-08, |
|
"loss": 0.0, |
|
"step": 27300 |
|
}, |
|
{ |
|
"epoch": 3.3237552976140834, |
|
"grad_norm": 0.003063188400119543, |
|
"learning_rate": 7.418280830873333e-08, |
|
"loss": 0.0, |
|
"step": 27400 |
|
}, |
|
{ |
|
"epoch": 3.335885791400996, |
|
"grad_norm": 0.0045642610639333725, |
|
"learning_rate": 7.404801380295739e-08, |
|
"loss": 0.0, |
|
"step": 27500 |
|
}, |
|
{ |
|
"epoch": 3.348016285187909, |
|
"grad_norm": 0.005651027895510197, |
|
"learning_rate": 7.391321929718144e-08, |
|
"loss": 0.0, |
|
"step": 27600 |
|
}, |
|
{ |
|
"epoch": 3.360146778974822, |
|
"grad_norm": 0.005226987414062023, |
|
"learning_rate": 7.37784247914055e-08, |
|
"loss": 0.0, |
|
"step": 27700 |
|
}, |
|
{ |
|
"epoch": 3.3722772727617345, |
|
"grad_norm": 0.0039469217881560326, |
|
"learning_rate": 7.364363028562956e-08, |
|
"loss": 0.0, |
|
"step": 27800 |
|
}, |
|
{ |
|
"epoch": 3.384407766548647, |
|
"grad_norm": 0.0011639875592663884, |
|
"learning_rate": 7.350883577985361e-08, |
|
"loss": 0.0, |
|
"step": 27900 |
|
}, |
|
{ |
|
"epoch": 3.39653826033556, |
|
"grad_norm": 0.002607525559142232, |
|
"learning_rate": 7.337404127407768e-08, |
|
"loss": 0.0, |
|
"step": 28000 |
|
}, |
|
{ |
|
"epoch": 3.4086687541224725, |
|
"grad_norm": 0.0024226950481534004, |
|
"learning_rate": 7.323924676830172e-08, |
|
"loss": 0.0, |
|
"step": 28100 |
|
}, |
|
{ |
|
"epoch": 3.420799247909385, |
|
"grad_norm": 0.008228462189435959, |
|
"learning_rate": 7.310445226252577e-08, |
|
"loss": 0.0, |
|
"step": 28200 |
|
}, |
|
{ |
|
"epoch": 3.432929741696298, |
|
"grad_norm": 0.0010171543108299375, |
|
"learning_rate": 7.296965775674983e-08, |
|
"loss": 0.0, |
|
"step": 28300 |
|
}, |
|
{ |
|
"epoch": 3.4450602354832105, |
|
"grad_norm": 0.0010841701878234744, |
|
"learning_rate": 7.283486325097389e-08, |
|
"loss": 0.0, |
|
"step": 28400 |
|
}, |
|
{ |
|
"epoch": 3.457190729270123, |
|
"grad_norm": 0.0019339750288054347, |
|
"learning_rate": 7.270006874519794e-08, |
|
"loss": 0.0, |
|
"step": 28500 |
|
}, |
|
{ |
|
"epoch": 3.469321223057036, |
|
"grad_norm": 0.0008356723701581359, |
|
"learning_rate": 7.2565274239422e-08, |
|
"loss": 0.0, |
|
"step": 28600 |
|
}, |
|
{ |
|
"epoch": 3.481451716843949, |
|
"grad_norm": 0.0033976007252931595, |
|
"learning_rate": 7.243047973364606e-08, |
|
"loss": 0.0, |
|
"step": 28700 |
|
}, |
|
{ |
|
"epoch": 3.4935822106308616, |
|
"grad_norm": 0.0017918187659233809, |
|
"learning_rate": 7.229568522787011e-08, |
|
"loss": 0.0, |
|
"step": 28800 |
|
}, |
|
{ |
|
"epoch": 3.5057127044177743, |
|
"grad_norm": 0.000810507161077112, |
|
"learning_rate": 7.216089072209416e-08, |
|
"loss": 0.0, |
|
"step": 28900 |
|
}, |
|
{ |
|
"epoch": 3.517843198204687, |
|
"grad_norm": 0.0006327140727080405, |
|
"learning_rate": 7.202609621631822e-08, |
|
"loss": 0.0, |
|
"step": 29000 |
|
}, |
|
{ |
|
"epoch": 3.5299736919915996, |
|
"grad_norm": 0.0008019423694349825, |
|
"learning_rate": 7.189130171054227e-08, |
|
"loss": 0.0, |
|
"step": 29100 |
|
}, |
|
{ |
|
"epoch": 3.5421041857785123, |
|
"grad_norm": 0.001055610366165638, |
|
"learning_rate": 7.175650720476632e-08, |
|
"loss": 0.0, |
|
"step": 29200 |
|
}, |
|
{ |
|
"epoch": 3.554234679565425, |
|
"grad_norm": 0.0005360008217394352, |
|
"learning_rate": 7.162171269899039e-08, |
|
"loss": 0.0, |
|
"step": 29300 |
|
}, |
|
{ |
|
"epoch": 3.566365173352338, |
|
"grad_norm": 0.0012855017557740211, |
|
"learning_rate": 7.148691819321444e-08, |
|
"loss": 0.0, |
|
"step": 29400 |
|
}, |
|
{ |
|
"epoch": 3.5784956671392507, |
|
"grad_norm": 0.0014753304421901703, |
|
"learning_rate": 7.135212368743849e-08, |
|
"loss": 0.0, |
|
"step": 29500 |
|
}, |
|
{ |
|
"epoch": 3.5906261609261634, |
|
"grad_norm": 0.0004512036102823913, |
|
"learning_rate": 7.121732918166256e-08, |
|
"loss": 0.0, |
|
"step": 29600 |
|
}, |
|
{ |
|
"epoch": 3.602756654713076, |
|
"grad_norm": 0.0014754869043827057, |
|
"learning_rate": 7.108253467588661e-08, |
|
"loss": 0.0, |
|
"step": 29700 |
|
}, |
|
{ |
|
"epoch": 3.6148871484999887, |
|
"grad_norm": 0.0005086124874651432, |
|
"learning_rate": 7.094774017011066e-08, |
|
"loss": 0.0, |
|
"step": 29800 |
|
}, |
|
{ |
|
"epoch": 3.6270176422869014, |
|
"grad_norm": 0.0010658778483048081, |
|
"learning_rate": 7.081294566433472e-08, |
|
"loss": 0.0, |
|
"step": 29900 |
|
}, |
|
{ |
|
"epoch": 3.639148136073814, |
|
"grad_norm": 0.00038994685746729374, |
|
"learning_rate": 7.067815115855877e-08, |
|
"loss": 0.0, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.639148136073814, |
|
"eval_loss": 8.61085754877422e-06, |
|
"eval_runtime": 13089.3428, |
|
"eval_samples_per_second": 32.246, |
|
"eval_steps_per_second": 4.031, |
|
"step": 30000 |
|
}, |
|
{ |
|
"epoch": 3.6512786298607267, |
|
"grad_norm": 0.0005141702713444829, |
|
"learning_rate": 7.054335665278282e-08, |
|
"loss": 0.0, |
|
"step": 30100 |
|
}, |
|
{ |
|
"epoch": 3.6634091236476394, |
|
"grad_norm": 0.001066096592694521, |
|
"learning_rate": 7.040856214700689e-08, |
|
"loss": 0.0, |
|
"step": 30200 |
|
}, |
|
{ |
|
"epoch": 3.675539617434552, |
|
"grad_norm": 0.0003840310382656753, |
|
"learning_rate": 7.027376764123094e-08, |
|
"loss": 0.0, |
|
"step": 30300 |
|
}, |
|
{ |
|
"epoch": 3.6876701112214647, |
|
"grad_norm": 0.0003469325019977987, |
|
"learning_rate": 7.013897313545499e-08, |
|
"loss": 0.0, |
|
"step": 30400 |
|
}, |
|
{ |
|
"epoch": 3.6998006050083774, |
|
"grad_norm": 0.0008366837282665074, |
|
"learning_rate": 7.000417862967906e-08, |
|
"loss": 0.0, |
|
"step": 30500 |
|
}, |
|
{ |
|
"epoch": 3.7119310987952905, |
|
"grad_norm": 0.00028104009106755257, |
|
"learning_rate": 6.986938412390311e-08, |
|
"loss": 0.0, |
|
"step": 30600 |
|
}, |
|
{ |
|
"epoch": 3.724061592582203, |
|
"grad_norm": 0.0009309325832873583, |
|
"learning_rate": 6.973458961812716e-08, |
|
"loss": 0.0, |
|
"step": 30700 |
|
}, |
|
{ |
|
"epoch": 3.736192086369116, |
|
"grad_norm": 0.00024238611513283104, |
|
"learning_rate": 6.959979511235122e-08, |
|
"loss": 0.0, |
|
"step": 30800 |
|
}, |
|
{ |
|
"epoch": 3.7483225801560285, |
|
"grad_norm": 0.00021373844356276095, |
|
"learning_rate": 6.946500060657527e-08, |
|
"loss": 0.0, |
|
"step": 30900 |
|
}, |
|
{ |
|
"epoch": 3.760453073942941, |
|
"grad_norm": 0.0007159899105317891, |
|
"learning_rate": 6.933020610079932e-08, |
|
"loss": 0.0, |
|
"step": 31000 |
|
}, |
|
{ |
|
"epoch": 3.772583567729854, |
|
"grad_norm": 0.0006215888424776495, |
|
"learning_rate": 6.919541159502337e-08, |
|
"loss": 0.0, |
|
"step": 31100 |
|
}, |
|
{ |
|
"epoch": 3.7847140615167665, |
|
"grad_norm": 0.0001826356747187674, |
|
"learning_rate": 6.906061708924744e-08, |
|
"loss": 0.0, |
|
"step": 31200 |
|
}, |
|
{ |
|
"epoch": 3.7968445553036796, |
|
"grad_norm": 0.0002084925799863413, |
|
"learning_rate": 6.892582258347149e-08, |
|
"loss": 0.0, |
|
"step": 31300 |
|
}, |
|
{ |
|
"epoch": 3.8089750490905923, |
|
"grad_norm": 0.00020457223581615835, |
|
"learning_rate": 6.879102807769555e-08, |
|
"loss": 0.0, |
|
"step": 31400 |
|
}, |
|
{ |
|
"epoch": 3.821105542877505, |
|
"grad_norm": 0.00044186966260895133, |
|
"learning_rate": 6.865623357191961e-08, |
|
"loss": 0.0, |
|
"step": 31500 |
|
}, |
|
{ |
|
"epoch": 3.8332360366644176, |
|
"grad_norm": 0.00029807299142703414, |
|
"learning_rate": 6.852143906614366e-08, |
|
"loss": 0.0, |
|
"step": 31600 |
|
}, |
|
{ |
|
"epoch": 3.8453665304513303, |
|
"grad_norm": 0.0004684592713601887, |
|
"learning_rate": 6.838664456036772e-08, |
|
"loss": 0.0, |
|
"step": 31700 |
|
}, |
|
{ |
|
"epoch": 3.857497024238243, |
|
"grad_norm": 0.00024593668058514595, |
|
"learning_rate": 6.825185005459177e-08, |
|
"loss": 0.0, |
|
"step": 31800 |
|
}, |
|
{ |
|
"epoch": 3.8696275180251556, |
|
"grad_norm": 0.0001907894911710173, |
|
"learning_rate": 6.811705554881582e-08, |
|
"loss": 0.0, |
|
"step": 31900 |
|
}, |
|
{ |
|
"epoch": 3.8817580118120683, |
|
"grad_norm": 0.0001471816358389333, |
|
"learning_rate": 6.798226104303987e-08, |
|
"loss": 0.0, |
|
"step": 32000 |
|
}, |
|
{ |
|
"epoch": 3.893888505598981, |
|
"grad_norm": 0.0004228481266181916, |
|
"learning_rate": 6.784746653726394e-08, |
|
"loss": 0.0, |
|
"step": 32100 |
|
}, |
|
{ |
|
"epoch": 3.9060189993858936, |
|
"grad_norm": 0.00015706397243775427, |
|
"learning_rate": 6.7712672031488e-08, |
|
"loss": 0.0, |
|
"step": 32200 |
|
}, |
|
{ |
|
"epoch": 3.9181494931728063, |
|
"grad_norm": 0.00015203200746327639, |
|
"learning_rate": 6.757787752571205e-08, |
|
"loss": 0.0, |
|
"step": 32300 |
|
}, |
|
{ |
|
"epoch": 3.930279986959719, |
|
"grad_norm": 0.00012529987725429237, |
|
"learning_rate": 6.744308301993611e-08, |
|
"loss": 0.0, |
|
"step": 32400 |
|
}, |
|
{ |
|
"epoch": 3.942410480746632, |
|
"grad_norm": 0.00023667830100748688, |
|
"learning_rate": 6.730828851416016e-08, |
|
"loss": 0.0, |
|
"step": 32500 |
|
}, |
|
{ |
|
"epoch": 3.9545409745335447, |
|
"grad_norm": 0.0003280766832176596, |
|
"learning_rate": 6.717349400838422e-08, |
|
"loss": 0.0, |
|
"step": 32600 |
|
}, |
|
{ |
|
"epoch": 3.9666714683204574, |
|
"grad_norm": 0.0003750512842088938, |
|
"learning_rate": 6.703869950260827e-08, |
|
"loss": 0.0, |
|
"step": 32700 |
|
}, |
|
{ |
|
"epoch": 3.97880196210737, |
|
"grad_norm": 0.00031201005913317204, |
|
"learning_rate": 6.690390499683232e-08, |
|
"loss": 0.0, |
|
"step": 32800 |
|
}, |
|
{ |
|
"epoch": 3.9909324558942827, |
|
"grad_norm": 0.00028819395811297, |
|
"learning_rate": 6.676911049105638e-08, |
|
"loss": 0.0, |
|
"step": 32900 |
|
}, |
|
{ |
|
"epoch": 4.003062949681196, |
|
"grad_norm": 0.00012892342056147754, |
|
"learning_rate": 6.663431598528043e-08, |
|
"loss": 0.0, |
|
"step": 33000 |
|
}, |
|
{ |
|
"epoch": 4.0151934434681085, |
|
"grad_norm": 7.572331378469244e-05, |
|
"learning_rate": 6.64995214795045e-08, |
|
"loss": 0.0, |
|
"step": 33100 |
|
}, |
|
{ |
|
"epoch": 4.027323937255021, |
|
"grad_norm": 7.666134479222819e-05, |
|
"learning_rate": 6.636472697372855e-08, |
|
"loss": 0.0, |
|
"step": 33200 |
|
}, |
|
{ |
|
"epoch": 4.039454431041934, |
|
"grad_norm": 7.41102485335432e-05, |
|
"learning_rate": 6.62299324679526e-08, |
|
"loss": 0.0, |
|
"step": 33300 |
|
}, |
|
{ |
|
"epoch": 4.0515849248288465, |
|
"grad_norm": 5.6044456869130954e-05, |
|
"learning_rate": 6.609513796217667e-08, |
|
"loss": 0.0, |
|
"step": 33400 |
|
}, |
|
{ |
|
"epoch": 4.063715418615759, |
|
"grad_norm": 7.278579141711816e-05, |
|
"learning_rate": 6.596034345640072e-08, |
|
"loss": 0.0, |
|
"step": 33500 |
|
}, |
|
{ |
|
"epoch": 4.075845912402672, |
|
"grad_norm": 0.00030291761504486203, |
|
"learning_rate": 6.582554895062477e-08, |
|
"loss": 0.0, |
|
"step": 33600 |
|
}, |
|
{ |
|
"epoch": 4.0879764061895845, |
|
"grad_norm": 0.00017951276095118374, |
|
"learning_rate": 6.569075444484882e-08, |
|
"loss": 0.0, |
|
"step": 33700 |
|
}, |
|
{ |
|
"epoch": 4.100106899976497, |
|
"grad_norm": 5.9736263210652396e-05, |
|
"learning_rate": 6.555595993907288e-08, |
|
"loss": 0.0, |
|
"step": 33800 |
|
}, |
|
{ |
|
"epoch": 4.11223739376341, |
|
"grad_norm": 0.00021180949988774955, |
|
"learning_rate": 6.542116543329693e-08, |
|
"loss": 0.0, |
|
"step": 33900 |
|
}, |
|
{ |
|
"epoch": 4.1243678875503225, |
|
"grad_norm": 0.00012666590919252485, |
|
"learning_rate": 6.5286370927521e-08, |
|
"loss": 0.0, |
|
"step": 34000 |
|
}, |
|
{ |
|
"epoch": 4.136498381337235, |
|
"grad_norm": 4.756751877721399e-05, |
|
"learning_rate": 6.515157642174505e-08, |
|
"loss": 0.0, |
|
"step": 34100 |
|
}, |
|
{ |
|
"epoch": 4.148628875124148, |
|
"grad_norm": 3.8059039070503786e-05, |
|
"learning_rate": 6.50167819159691e-08, |
|
"loss": 0.0, |
|
"step": 34200 |
|
}, |
|
{ |
|
"epoch": 4.1607593689110605, |
|
"grad_norm": 6.0839298384962603e-05, |
|
"learning_rate": 6.488198741019317e-08, |
|
"loss": 0.0, |
|
"step": 34300 |
|
}, |
|
{ |
|
"epoch": 4.172889862697973, |
|
"grad_norm": 0.0002114167291438207, |
|
"learning_rate": 6.474719290441722e-08, |
|
"loss": 0.0, |
|
"step": 34400 |
|
}, |
|
{ |
|
"epoch": 4.185020356484886, |
|
"grad_norm": 0.00015508649812545627, |
|
"learning_rate": 6.461239839864127e-08, |
|
"loss": 0.0, |
|
"step": 34500 |
|
}, |
|
{ |
|
"epoch": 4.1971508502717985, |
|
"grad_norm": 7.546142296632752e-05, |
|
"learning_rate": 6.447760389286532e-08, |
|
"loss": 0.0, |
|
"step": 34600 |
|
}, |
|
{ |
|
"epoch": 4.209281344058712, |
|
"grad_norm": 3.271881359978579e-05, |
|
"learning_rate": 6.434280938708938e-08, |
|
"loss": 0.0, |
|
"step": 34700 |
|
}, |
|
{ |
|
"epoch": 4.221411837845625, |
|
"grad_norm": 0.0001592914341017604, |
|
"learning_rate": 6.420801488131343e-08, |
|
"loss": 0.0, |
|
"step": 34800 |
|
}, |
|
{ |
|
"epoch": 4.233542331632537, |
|
"grad_norm": 5.735379454563372e-05, |
|
"learning_rate": 6.40732203755375e-08, |
|
"loss": 0.0, |
|
"step": 34900 |
|
}, |
|
{ |
|
"epoch": 4.24567282541945, |
|
"grad_norm": 0.00011750426347134635, |
|
"learning_rate": 6.393842586976155e-08, |
|
"loss": 0.0, |
|
"step": 35000 |
|
}, |
|
{ |
|
"epoch": 4.257803319206363, |
|
"grad_norm": 8.914129284676164e-05, |
|
"learning_rate": 6.38036313639856e-08, |
|
"loss": 0.0, |
|
"step": 35100 |
|
}, |
|
{ |
|
"epoch": 4.269933812993275, |
|
"grad_norm": 2.7345347916707397e-05, |
|
"learning_rate": 6.366883685820965e-08, |
|
"loss": 0.0, |
|
"step": 35200 |
|
}, |
|
{ |
|
"epoch": 4.282064306780188, |
|
"grad_norm": 4.532103048404679e-05, |
|
"learning_rate": 6.353404235243372e-08, |
|
"loss": 0.0, |
|
"step": 35300 |
|
}, |
|
{ |
|
"epoch": 4.294194800567101, |
|
"grad_norm": 5.636207788484171e-05, |
|
"learning_rate": 6.339924784665777e-08, |
|
"loss": 0.0, |
|
"step": 35400 |
|
}, |
|
{ |
|
"epoch": 4.306325294354013, |
|
"grad_norm": 0.00013345239858608693, |
|
"learning_rate": 6.326445334088182e-08, |
|
"loss": 0.0, |
|
"step": 35500 |
|
}, |
|
{ |
|
"epoch": 4.318455788140926, |
|
"grad_norm": 8.585578325437382e-05, |
|
"learning_rate": 6.312965883510588e-08, |
|
"loss": 0.0, |
|
"step": 35600 |
|
}, |
|
{ |
|
"epoch": 4.330586281927839, |
|
"grad_norm": 2.5150986402877606e-05, |
|
"learning_rate": 6.299486432932993e-08, |
|
"loss": 0.0, |
|
"step": 35700 |
|
}, |
|
{ |
|
"epoch": 4.342716775714751, |
|
"grad_norm": 3.5774806747213006e-05, |
|
"learning_rate": 6.286006982355398e-08, |
|
"loss": 0.0, |
|
"step": 35800 |
|
}, |
|
{ |
|
"epoch": 4.354847269501664, |
|
"grad_norm": 4.836301377508789e-05, |
|
"learning_rate": 6.272527531777805e-08, |
|
"loss": 0.0, |
|
"step": 35900 |
|
}, |
|
{ |
|
"epoch": 4.366977763288577, |
|
"grad_norm": 2.2555306713911705e-05, |
|
"learning_rate": 6.25904808120021e-08, |
|
"loss": 0.0, |
|
"step": 36000 |
|
}, |
|
{ |
|
"epoch": 4.379108257075489, |
|
"grad_norm": 3.531112088239752e-05, |
|
"learning_rate": 6.245568630622615e-08, |
|
"loss": 0.0, |
|
"step": 36100 |
|
}, |
|
{ |
|
"epoch": 4.391238750862402, |
|
"grad_norm": 7.099560025380924e-05, |
|
"learning_rate": 6.232089180045022e-08, |
|
"loss": 0.0, |
|
"step": 36200 |
|
}, |
|
{ |
|
"epoch": 4.403369244649315, |
|
"grad_norm": 0.00011959305265918374, |
|
"learning_rate": 6.218609729467427e-08, |
|
"loss": 0.0, |
|
"step": 36300 |
|
}, |
|
{ |
|
"epoch": 4.415499738436227, |
|
"grad_norm": 4.892437209491618e-05, |
|
"learning_rate": 6.205130278889832e-08, |
|
"loss": 0.0, |
|
"step": 36400 |
|
}, |
|
{ |
|
"epoch": 4.427630232223141, |
|
"grad_norm": 1.5868727132328786e-05, |
|
"learning_rate": 6.191650828312238e-08, |
|
"loss": 0.0, |
|
"step": 36500 |
|
}, |
|
{ |
|
"epoch": 4.439760726010054, |
|
"grad_norm": 4.7836245357757434e-05, |
|
"learning_rate": 6.178171377734643e-08, |
|
"loss": 0.0, |
|
"step": 36600 |
|
}, |
|
{ |
|
"epoch": 4.451891219796966, |
|
"grad_norm": 9.044109901878983e-05, |
|
"learning_rate": 6.164691927157048e-08, |
|
"loss": 0.0, |
|
"step": 36700 |
|
}, |
|
{ |
|
"epoch": 4.464021713583879, |
|
"grad_norm": 7.068177364999428e-05, |
|
"learning_rate": 6.151212476579455e-08, |
|
"loss": 0.0, |
|
"step": 36800 |
|
}, |
|
{ |
|
"epoch": 4.476152207370792, |
|
"grad_norm": 2.011969445447903e-05, |
|
"learning_rate": 6.13773302600186e-08, |
|
"loss": 0.0, |
|
"step": 36900 |
|
}, |
|
{ |
|
"epoch": 4.488282701157704, |
|
"grad_norm": 9.63186175795272e-05, |
|
"learning_rate": 6.124253575424265e-08, |
|
"loss": 0.0, |
|
"step": 37000 |
|
}, |
|
{ |
|
"epoch": 4.500413194944617, |
|
"grad_norm": 5.197514474275522e-05, |
|
"learning_rate": 6.11077412484667e-08, |
|
"loss": 0.0, |
|
"step": 37100 |
|
}, |
|
{ |
|
"epoch": 4.51254368873153, |
|
"grad_norm": 2.6003468519775197e-05, |
|
"learning_rate": 6.097294674269077e-08, |
|
"loss": 0.0, |
|
"step": 37200 |
|
}, |
|
{ |
|
"epoch": 4.524674182518442, |
|
"grad_norm": 7.684047886868939e-05, |
|
"learning_rate": 6.083815223691482e-08, |
|
"loss": 0.0, |
|
"step": 37300 |
|
}, |
|
{ |
|
"epoch": 4.536804676305355, |
|
"grad_norm": 0.00010752572416095063, |
|
"learning_rate": 6.070335773113888e-08, |
|
"loss": 0.0, |
|
"step": 37400 |
|
}, |
|
{ |
|
"epoch": 4.5489351700922676, |
|
"grad_norm": 4.4010826968587935e-05, |
|
"learning_rate": 6.056856322536293e-08, |
|
"loss": 0.0, |
|
"step": 37500 |
|
}, |
|
{ |
|
"epoch": 4.56106566387918, |
|
"grad_norm": 0.00011428508150856942, |
|
"learning_rate": 6.043376871958698e-08, |
|
"loss": 0.0, |
|
"step": 37600 |
|
}, |
|
{ |
|
"epoch": 4.573196157666093, |
|
"grad_norm": 0.00011925880244234577, |
|
"learning_rate": 6.029897421381104e-08, |
|
"loss": 0.0, |
|
"step": 37700 |
|
}, |
|
{ |
|
"epoch": 4.5853266514530056, |
|
"grad_norm": 4.866239396505989e-05, |
|
"learning_rate": 6.01641797080351e-08, |
|
"loss": 0.0, |
|
"step": 37800 |
|
}, |
|
{ |
|
"epoch": 4.597457145239918, |
|
"grad_norm": 4.699817145592533e-05, |
|
"learning_rate": 6.002938520225915e-08, |
|
"loss": 0.0, |
|
"step": 37900 |
|
}, |
|
{ |
|
"epoch": 4.609587639026831, |
|
"grad_norm": 3.538289820426144e-05, |
|
"learning_rate": 5.989459069648321e-08, |
|
"loss": 0.0, |
|
"step": 38000 |
|
}, |
|
{ |
|
"epoch": 4.6217181328137436, |
|
"grad_norm": 0.0001686308823991567, |
|
"learning_rate": 5.975979619070727e-08, |
|
"loss": 0.0, |
|
"step": 38100 |
|
}, |
|
{ |
|
"epoch": 4.633848626600656, |
|
"grad_norm": 3.119569373666309e-05, |
|
"learning_rate": 5.962500168493133e-08, |
|
"loss": 0.0, |
|
"step": 38200 |
|
}, |
|
{ |
|
"epoch": 4.645979120387569, |
|
"grad_norm": 1.2374849575280678e-05, |
|
"learning_rate": 5.949020717915537e-08, |
|
"loss": 0.0, |
|
"step": 38300 |
|
}, |
|
{ |
|
"epoch": 4.6581096141744815, |
|
"grad_norm": 1.740476545819547e-05, |
|
"learning_rate": 5.935541267337944e-08, |
|
"loss": 0.0, |
|
"step": 38400 |
|
}, |
|
{ |
|
"epoch": 4.670240107961394, |
|
"grad_norm": 2.1879846826777793e-05, |
|
"learning_rate": 5.9220618167603483e-08, |
|
"loss": 0.0, |
|
"step": 38500 |
|
}, |
|
{ |
|
"epoch": 4.682370601748308, |
|
"grad_norm": 6.039372965460643e-05, |
|
"learning_rate": 5.9085823661827536e-08, |
|
"loss": 0.0, |
|
"step": 38600 |
|
}, |
|
{ |
|
"epoch": 4.69450109553522, |
|
"grad_norm": 3.6898843973176554e-05, |
|
"learning_rate": 5.89510291560516e-08, |
|
"loss": 0.0, |
|
"step": 38700 |
|
}, |
|
{ |
|
"epoch": 4.706631589322133, |
|
"grad_norm": 3.252164970035665e-05, |
|
"learning_rate": 5.8816234650275655e-08, |
|
"loss": 0.0, |
|
"step": 38800 |
|
}, |
|
{ |
|
"epoch": 4.718762083109046, |
|
"grad_norm": 8.90696537680924e-06, |
|
"learning_rate": 5.868144014449971e-08, |
|
"loss": 0.0, |
|
"step": 38900 |
|
}, |
|
{ |
|
"epoch": 4.730892576895958, |
|
"grad_norm": 2.7334457627148367e-05, |
|
"learning_rate": 5.8546645638723766e-08, |
|
"loss": 0.0, |
|
"step": 39000 |
|
}, |
|
{ |
|
"epoch": 4.743023070682871, |
|
"grad_norm": 8.614475518697873e-06, |
|
"learning_rate": 5.841185113294782e-08, |
|
"loss": 0.0, |
|
"step": 39100 |
|
}, |
|
{ |
|
"epoch": 4.755153564469784, |
|
"grad_norm": 1.1286195331194904e-05, |
|
"learning_rate": 5.827705662717187e-08, |
|
"loss": 0.0, |
|
"step": 39200 |
|
}, |
|
{ |
|
"epoch": 4.767284058256696, |
|
"grad_norm": 1.4409168215934187e-05, |
|
"learning_rate": 5.8142262121395924e-08, |
|
"loss": 0.0, |
|
"step": 39300 |
|
}, |
|
{ |
|
"epoch": 4.779414552043609, |
|
"grad_norm": 8.718334720470011e-05, |
|
"learning_rate": 5.800746761561999e-08, |
|
"loss": 0.0, |
|
"step": 39400 |
|
}, |
|
{ |
|
"epoch": 4.791545045830522, |
|
"grad_norm": 3.0478166081593372e-05, |
|
"learning_rate": 5.7872673109844036e-08, |
|
"loss": 0.0, |
|
"step": 39500 |
|
}, |
|
{ |
|
"epoch": 4.803675539617434, |
|
"grad_norm": 4.2255876905983314e-05, |
|
"learning_rate": 5.773787860406809e-08, |
|
"loss": 0.0, |
|
"step": 39600 |
|
}, |
|
{ |
|
"epoch": 4.815806033404347, |
|
"grad_norm": 4.204777360428125e-05, |
|
"learning_rate": 5.7603084098292155e-08, |
|
"loss": 0.0, |
|
"step": 39700 |
|
}, |
|
{ |
|
"epoch": 4.82793652719126, |
|
"grad_norm": 4.219416223349981e-05, |
|
"learning_rate": 5.746828959251621e-08, |
|
"loss": 0.0, |
|
"step": 39800 |
|
}, |
|
{ |
|
"epoch": 4.840067020978172, |
|
"grad_norm": 5.802089435746893e-05, |
|
"learning_rate": 5.733349508674026e-08, |
|
"loss": 0.0, |
|
"step": 39900 |
|
}, |
|
{ |
|
"epoch": 4.852197514765085, |
|
"grad_norm": 7.914522575447336e-05, |
|
"learning_rate": 5.719870058096432e-08, |
|
"loss": 0.0, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.852197514765085, |
|
"eval_loss": 2.0435865621948324e-07, |
|
"eval_runtime": 12761.113, |
|
"eval_samples_per_second": 33.075, |
|
"eval_steps_per_second": 4.134, |
|
"step": 40000 |
|
}, |
|
{ |
|
"epoch": 4.864328008551998, |
|
"grad_norm": 4.935232936986722e-05, |
|
"learning_rate": 5.706390607518837e-08, |
|
"loss": 0.0, |
|
"step": 40100 |
|
}, |
|
{ |
|
"epoch": 4.876458502338911, |
|
"grad_norm": 7.840626494726166e-05, |
|
"learning_rate": 5.6929111569412425e-08, |
|
"loss": 0.0, |
|
"step": 40200 |
|
}, |
|
{ |
|
"epoch": 4.888588996125824, |
|
"grad_norm": 1.2734864867525175e-05, |
|
"learning_rate": 5.679431706363649e-08, |
|
"loss": 0.0, |
|
"step": 40300 |
|
}, |
|
{ |
|
"epoch": 4.900719489912737, |
|
"grad_norm": 4.8442419938510284e-05, |
|
"learning_rate": 5.665952255786054e-08, |
|
"loss": 0.0, |
|
"step": 40400 |
|
}, |
|
{ |
|
"epoch": 4.912849983699649, |
|
"grad_norm": 6.906664930284023e-05, |
|
"learning_rate": 5.652472805208459e-08, |
|
"loss": 0.0, |
|
"step": 40500 |
|
}, |
|
{ |
|
"epoch": 4.924980477486562, |
|
"grad_norm": 5.0475380703574046e-05, |
|
"learning_rate": 5.6389933546308655e-08, |
|
"loss": 0.0, |
|
"step": 40600 |
|
}, |
|
{ |
|
"epoch": 4.937110971273475, |
|
"grad_norm": 1.4410921721719205e-05, |
|
"learning_rate": 5.625513904053271e-08, |
|
"loss": 0.0, |
|
"step": 40700 |
|
}, |
|
{ |
|
"epoch": 4.949241465060387, |
|
"grad_norm": 4.081324368598871e-05, |
|
"learning_rate": 5.612034453475676e-08, |
|
"loss": 0.0, |
|
"step": 40800 |
|
}, |
|
{ |
|
"epoch": 4.9613719588473, |
|
"grad_norm": 7.731416189926676e-06, |
|
"learning_rate": 5.598555002898082e-08, |
|
"loss": 0.0, |
|
"step": 40900 |
|
}, |
|
{ |
|
"epoch": 4.973502452634213, |
|
"grad_norm": 1.6508680346305482e-05, |
|
"learning_rate": 5.585075552320487e-08, |
|
"loss": 0.0, |
|
"step": 41000 |
|
}, |
|
{ |
|
"epoch": 4.985632946421125, |
|
"grad_norm": 3.791180643020198e-05, |
|
"learning_rate": 5.5715961017428925e-08, |
|
"loss": 0.0, |
|
"step": 41100 |
|
}, |
|
{ |
|
"epoch": 4.997763440208038, |
|
"grad_norm": 2.726632374105975e-05, |
|
"learning_rate": 5.558116651165298e-08, |
|
"loss": 0.0, |
|
"step": 41200 |
|
}, |
|
{ |
|
"epoch": 5.009893933994951, |
|
"grad_norm": 0.0001511267473688349, |
|
"learning_rate": 5.544637200587704e-08, |
|
"loss": 0.0, |
|
"step": 41300 |
|
}, |
|
{ |
|
"epoch": 5.022024427781863, |
|
"grad_norm": 1.2235775102453772e-05, |
|
"learning_rate": 5.531157750010109e-08, |
|
"loss": 0.0, |
|
"step": 41400 |
|
}, |
|
{ |
|
"epoch": 5.034154921568776, |
|
"grad_norm": 0.00012380690895952284, |
|
"learning_rate": 5.517678299432514e-08, |
|
"loss": 0.0, |
|
"step": 41500 |
|
}, |
|
{ |
|
"epoch": 5.046285415355689, |
|
"grad_norm": 2.332998155907262e-05, |
|
"learning_rate": 5.504198848854921e-08, |
|
"loss": 0.0, |
|
"step": 41600 |
|
}, |
|
{ |
|
"epoch": 5.058415909142601, |
|
"grad_norm": 0.00012525348574854434, |
|
"learning_rate": 5.490719398277326e-08, |
|
"loss": 0.0, |
|
"step": 41700 |
|
}, |
|
{ |
|
"epoch": 5.070546402929514, |
|
"grad_norm": 2.9540859031840228e-05, |
|
"learning_rate": 5.4772399476997314e-08, |
|
"loss": 0.0, |
|
"step": 41800 |
|
}, |
|
{ |
|
"epoch": 5.082676896716427, |
|
"grad_norm": 5.814078758703545e-05, |
|
"learning_rate": 5.463760497122137e-08, |
|
"loss": 0.0, |
|
"step": 41900 |
|
}, |
|
{ |
|
"epoch": 5.094807390503339, |
|
"grad_norm": 0.00012713873002212495, |
|
"learning_rate": 5.4502810465445426e-08, |
|
"loss": 0.0, |
|
"step": 42000 |
|
}, |
|
{ |
|
"epoch": 5.106937884290252, |
|
"grad_norm": 0.0001255370443686843, |
|
"learning_rate": 5.436801595966948e-08, |
|
"loss": 0.0, |
|
"step": 42100 |
|
}, |
|
{ |
|
"epoch": 5.1190683780771655, |
|
"grad_norm": 0.00012677146878559142, |
|
"learning_rate": 5.423322145389354e-08, |
|
"loss": 0.0, |
|
"step": 42200 |
|
}, |
|
{ |
|
"epoch": 5.131198871864078, |
|
"grad_norm": 5.592630259343423e-06, |
|
"learning_rate": 5.409842694811759e-08, |
|
"loss": 0.0, |
|
"step": 42300 |
|
}, |
|
{ |
|
"epoch": 5.143329365650991, |
|
"grad_norm": 3.1847266654949635e-05, |
|
"learning_rate": 5.396363244234164e-08, |
|
"loss": 0.0, |
|
"step": 42400 |
|
}, |
|
{ |
|
"epoch": 5.1554598594379035, |
|
"grad_norm": 3.685112460516393e-05, |
|
"learning_rate": 5.382883793656571e-08, |
|
"loss": 0.0, |
|
"step": 42500 |
|
}, |
|
{ |
|
"epoch": 5.167590353224816, |
|
"grad_norm": 1.862756471382454e-05, |
|
"learning_rate": 5.369404343078976e-08, |
|
"loss": 0.0, |
|
"step": 42600 |
|
}, |
|
{ |
|
"epoch": 5.179720847011729, |
|
"grad_norm": 3.480441591818817e-05, |
|
"learning_rate": 5.3559248925013814e-08, |
|
"loss": 0.0, |
|
"step": 42700 |
|
}, |
|
{ |
|
"epoch": 5.1918513407986415, |
|
"grad_norm": 5.029854946769774e-06, |
|
"learning_rate": 5.3424454419237874e-08, |
|
"loss": 0.0, |
|
"step": 42800 |
|
}, |
|
{ |
|
"epoch": 5.203981834585554, |
|
"grad_norm": 2.7888721888302825e-05, |
|
"learning_rate": 5.3289659913461926e-08, |
|
"loss": 0.0, |
|
"step": 42900 |
|
}, |
|
{ |
|
"epoch": 5.216112328372467, |
|
"grad_norm": 1.3389450941758696e-05, |
|
"learning_rate": 5.315486540768598e-08, |
|
"loss": 0.0, |
|
"step": 43000 |
|
}, |
|
{ |
|
"epoch": 5.2282428221593795, |
|
"grad_norm": 5.3387711886898614e-06, |
|
"learning_rate": 5.302007090191004e-08, |
|
"loss": 0.0, |
|
"step": 43100 |
|
}, |
|
{ |
|
"epoch": 5.240373315946292, |
|
"grad_norm": 4.869915301242145e-06, |
|
"learning_rate": 5.288527639613409e-08, |
|
"loss": 0.0, |
|
"step": 43200 |
|
}, |
|
{ |
|
"epoch": 5.252503809733205, |
|
"grad_norm": 1.4157280020299368e-05, |
|
"learning_rate": 5.2750481890358144e-08, |
|
"loss": 0.0, |
|
"step": 43300 |
|
}, |
|
{ |
|
"epoch": 5.2646343035201175, |
|
"grad_norm": 1.0791780368890613e-05, |
|
"learning_rate": 5.2615687384582196e-08, |
|
"loss": 0.0, |
|
"step": 43400 |
|
}, |
|
{ |
|
"epoch": 5.27676479730703, |
|
"grad_norm": 3.652514351415448e-05, |
|
"learning_rate": 5.248089287880626e-08, |
|
"loss": 0.0, |
|
"step": 43500 |
|
}, |
|
{ |
|
"epoch": 5.288895291093943, |
|
"grad_norm": 4.087711567990482e-05, |
|
"learning_rate": 5.2346098373030315e-08, |
|
"loss": 0.0, |
|
"step": 43600 |
|
}, |
|
{ |
|
"epoch": 5.3010257848808555, |
|
"grad_norm": 2.1705629478674382e-05, |
|
"learning_rate": 5.221130386725436e-08, |
|
"loss": 0.0, |
|
"step": 43700 |
|
}, |
|
{ |
|
"epoch": 5.313156278667768, |
|
"grad_norm": 2.8108963306294754e-05, |
|
"learning_rate": 5.2076509361478427e-08, |
|
"loss": 0.0, |
|
"step": 43800 |
|
}, |
|
{ |
|
"epoch": 5.325286772454681, |
|
"grad_norm": 2.8635831768042408e-05, |
|
"learning_rate": 5.194171485570248e-08, |
|
"loss": 0.0, |
|
"step": 43900 |
|
}, |
|
{ |
|
"epoch": 5.337417266241594, |
|
"grad_norm": 3.3284202800132334e-05, |
|
"learning_rate": 5.180692034992653e-08, |
|
"loss": 0.0, |
|
"step": 44000 |
|
}, |
|
{ |
|
"epoch": 5.349547760028507, |
|
"grad_norm": 2.190342274843715e-05, |
|
"learning_rate": 5.167212584415059e-08, |
|
"loss": 0.0, |
|
"step": 44100 |
|
}, |
|
{ |
|
"epoch": 5.36167825381542, |
|
"grad_norm": 2.5555082174832933e-05, |
|
"learning_rate": 5.1537331338374644e-08, |
|
"loss": 0.0, |
|
"step": 44200 |
|
}, |
|
{ |
|
"epoch": 5.373808747602332, |
|
"grad_norm": 3.973677667090669e-05, |
|
"learning_rate": 5.1402536832598697e-08, |
|
"loss": 0.0, |
|
"step": 44300 |
|
}, |
|
{ |
|
"epoch": 5.385939241389245, |
|
"grad_norm": 3.239759826101363e-05, |
|
"learning_rate": 5.126774232682276e-08, |
|
"loss": 0.0, |
|
"step": 44400 |
|
}, |
|
{ |
|
"epoch": 5.398069735176158, |
|
"grad_norm": 5.3550720622297376e-05, |
|
"learning_rate": 5.1132947821046815e-08, |
|
"loss": 0.0, |
|
"step": 44500 |
|
}, |
|
{ |
|
"epoch": 5.41020022896307, |
|
"grad_norm": 3.1185478292172775e-05, |
|
"learning_rate": 5.099815331527086e-08, |
|
"loss": 0.0, |
|
"step": 44600 |
|
}, |
|
{ |
|
"epoch": 5.422330722749983, |
|
"grad_norm": 5.0614133215276524e-05, |
|
"learning_rate": 5.086335880949493e-08, |
|
"loss": 0.0, |
|
"step": 44700 |
|
}, |
|
{ |
|
"epoch": 5.434461216536896, |
|
"grad_norm": 2.4577335352660157e-05, |
|
"learning_rate": 5.072856430371898e-08, |
|
"loss": 0.0, |
|
"step": 44800 |
|
}, |
|
{ |
|
"epoch": 5.446591710323808, |
|
"grad_norm": 2.079096157103777e-05, |
|
"learning_rate": 5.059376979794303e-08, |
|
"loss": 0.0, |
|
"step": 44900 |
|
}, |
|
{ |
|
"epoch": 5.458722204110721, |
|
"grad_norm": 1.3388003935688175e-05, |
|
"learning_rate": 5.045897529216709e-08, |
|
"loss": 0.0, |
|
"step": 45000 |
|
}, |
|
{ |
|
"epoch": 5.470852697897634, |
|
"grad_norm": 2.5073253709706478e-05, |
|
"learning_rate": 5.0324180786391144e-08, |
|
"loss": 0.0, |
|
"step": 45100 |
|
}, |
|
{ |
|
"epoch": 5.482983191684546, |
|
"grad_norm": 1.678628359513823e-05, |
|
"learning_rate": 5.01893862806152e-08, |
|
"loss": 0.0, |
|
"step": 45200 |
|
}, |
|
{ |
|
"epoch": 5.495113685471459, |
|
"grad_norm": 1.079649791790871e-05, |
|
"learning_rate": 5.005459177483925e-08, |
|
"loss": 0.0, |
|
"step": 45300 |
|
}, |
|
{ |
|
"epoch": 5.507244179258372, |
|
"grad_norm": 1.4951794582884759e-05, |
|
"learning_rate": 4.9919797269063316e-08, |
|
"loss": 0.0, |
|
"step": 45400 |
|
}, |
|
{ |
|
"epoch": 5.519374673045284, |
|
"grad_norm": 5.0269860366825014e-05, |
|
"learning_rate": 4.978500276328736e-08, |
|
"loss": 0.0, |
|
"step": 45500 |
|
}, |
|
{ |
|
"epoch": 5.531505166832197, |
|
"grad_norm": 2.7576521461014636e-05, |
|
"learning_rate": 4.965020825751142e-08, |
|
"loss": 0.0, |
|
"step": 45600 |
|
}, |
|
{ |
|
"epoch": 5.54363566061911, |
|
"grad_norm": 4.802513285540044e-05, |
|
"learning_rate": 4.951541375173548e-08, |
|
"loss": 0.0, |
|
"step": 45700 |
|
}, |
|
{ |
|
"epoch": 5.555766154406022, |
|
"grad_norm": 9.436444997845683e-06, |
|
"learning_rate": 4.938061924595953e-08, |
|
"loss": 0.0, |
|
"step": 45800 |
|
}, |
|
{ |
|
"epoch": 5.567896648192935, |
|
"grad_norm": 1.7118674804805778e-05, |
|
"learning_rate": 4.924582474018359e-08, |
|
"loss": 0.0, |
|
"step": 45900 |
|
}, |
|
{ |
|
"epoch": 5.580027141979848, |
|
"grad_norm": 1.7416510672774166e-05, |
|
"learning_rate": 4.911103023440764e-08, |
|
"loss": 0.0, |
|
"step": 46000 |
|
}, |
|
{ |
|
"epoch": 5.592157635766761, |
|
"grad_norm": 3.0051314752199687e-05, |
|
"learning_rate": 4.89762357286317e-08, |
|
"loss": 0.0, |
|
"step": 46100 |
|
}, |
|
{ |
|
"epoch": 5.604288129553674, |
|
"grad_norm": 7.384042419289472e-06, |
|
"learning_rate": 4.884144122285576e-08, |
|
"loss": 0.0, |
|
"step": 46200 |
|
}, |
|
{ |
|
"epoch": 5.616418623340587, |
|
"grad_norm": 3.820080746663734e-05, |
|
"learning_rate": 4.870664671707981e-08, |
|
"loss": 0.0, |
|
"step": 46300 |
|
}, |
|
{ |
|
"epoch": 5.628549117127499, |
|
"grad_norm": 1.9920646082027815e-05, |
|
"learning_rate": 4.857185221130386e-08, |
|
"loss": 0.0, |
|
"step": 46400 |
|
}, |
|
{ |
|
"epoch": 5.640679610914412, |
|
"grad_norm": 3.2449988793814555e-05, |
|
"learning_rate": 4.843705770552792e-08, |
|
"loss": 0.0, |
|
"step": 46500 |
|
}, |
|
{ |
|
"epoch": 5.652810104701325, |
|
"grad_norm": 1.5992029148037545e-05, |
|
"learning_rate": 4.8302263199751974e-08, |
|
"loss": 0.0, |
|
"step": 46600 |
|
}, |
|
{ |
|
"epoch": 5.664940598488237, |
|
"grad_norm": 8.42284680402372e-06, |
|
"learning_rate": 4.8167468693976033e-08, |
|
"loss": 0.0, |
|
"step": 46700 |
|
}, |
|
{ |
|
"epoch": 5.67707109227515, |
|
"grad_norm": 3.363145879120566e-05, |
|
"learning_rate": 4.8032674188200086e-08, |
|
"loss": 0.0, |
|
"step": 46800 |
|
}, |
|
{ |
|
"epoch": 5.689201586062063, |
|
"grad_norm": 9.66928928392008e-06, |
|
"learning_rate": 4.789787968242414e-08, |
|
"loss": 0.0, |
|
"step": 46900 |
|
}, |
|
{ |
|
"epoch": 5.701332079848975, |
|
"grad_norm": 4.893206278211437e-05, |
|
"learning_rate": 4.77630851766482e-08, |
|
"loss": 0.0, |
|
"step": 47000 |
|
}, |
|
{ |
|
"epoch": 5.713462573635888, |
|
"grad_norm": 2.201042661909014e-05, |
|
"learning_rate": 4.762829067087225e-08, |
|
"loss": 0.0, |
|
"step": 47100 |
|
}, |
|
{ |
|
"epoch": 5.725593067422801, |
|
"grad_norm": 1.98000852833502e-05, |
|
"learning_rate": 4.749349616509631e-08, |
|
"loss": 0.0, |
|
"step": 47200 |
|
}, |
|
{ |
|
"epoch": 5.737723561209713, |
|
"grad_norm": 7.69750931794988e-06, |
|
"learning_rate": 4.735870165932037e-08, |
|
"loss": 0.0, |
|
"step": 47300 |
|
}, |
|
{ |
|
"epoch": 5.749854054996626, |
|
"grad_norm": 4.621636435331311e-06, |
|
"learning_rate": 4.7223907153544415e-08, |
|
"loss": 0.0, |
|
"step": 47400 |
|
}, |
|
{ |
|
"epoch": 5.761984548783539, |
|
"grad_norm": 1.4387391274794936e-05, |
|
"learning_rate": 4.7089112647768474e-08, |
|
"loss": 0.0, |
|
"step": 47500 |
|
}, |
|
{ |
|
"epoch": 5.774115042570451, |
|
"grad_norm": 1.7973265130422078e-05, |
|
"learning_rate": 4.6954318141992534e-08, |
|
"loss": 0.0, |
|
"step": 47600 |
|
}, |
|
{ |
|
"epoch": 5.786245536357365, |
|
"grad_norm": 2.8360249416437e-05, |
|
"learning_rate": 4.6819523636216586e-08, |
|
"loss": 0.0, |
|
"step": 47700 |
|
}, |
|
{ |
|
"epoch": 5.7983760301442775, |
|
"grad_norm": 2.603951725177467e-05, |
|
"learning_rate": 4.668472913044064e-08, |
|
"loss": 0.0, |
|
"step": 47800 |
|
}, |
|
{ |
|
"epoch": 5.81050652393119, |
|
"grad_norm": 1.1267226909694728e-05, |
|
"learning_rate": 4.654993462466469e-08, |
|
"loss": 0.0, |
|
"step": 47900 |
|
}, |
|
{ |
|
"epoch": 5.822637017718103, |
|
"grad_norm": 4.544790499494411e-05, |
|
"learning_rate": 4.641514011888875e-08, |
|
"loss": 0.0, |
|
"step": 48000 |
|
}, |
|
{ |
|
"epoch": 5.8347675115050155, |
|
"grad_norm": 5.073808551969705e-06, |
|
"learning_rate": 4.628034561311281e-08, |
|
"loss": 0.0, |
|
"step": 48100 |
|
}, |
|
{ |
|
"epoch": 5.846898005291928, |
|
"grad_norm": 4.531604190560756e-06, |
|
"learning_rate": 4.614555110733686e-08, |
|
"loss": 0.0, |
|
"step": 48200 |
|
}, |
|
{ |
|
"epoch": 5.859028499078841, |
|
"grad_norm": 2.5783774617593735e-05, |
|
"learning_rate": 4.6010756601560916e-08, |
|
"loss": 0.0, |
|
"step": 48300 |
|
}, |
|
{ |
|
"epoch": 5.8711589928657535, |
|
"grad_norm": 1.8032031221082434e-05, |
|
"learning_rate": 4.5875962095784975e-08, |
|
"loss": 0.0, |
|
"step": 48400 |
|
}, |
|
{ |
|
"epoch": 5.883289486652666, |
|
"grad_norm": 2.339402271900326e-05, |
|
"learning_rate": 4.574116759000903e-08, |
|
"loss": 0.0, |
|
"step": 48500 |
|
}, |
|
{ |
|
"epoch": 5.895419980439579, |
|
"grad_norm": 1.0057786312245298e-05, |
|
"learning_rate": 4.560637308423309e-08, |
|
"loss": 0.0, |
|
"step": 48600 |
|
}, |
|
{ |
|
"epoch": 5.9075504742264915, |
|
"grad_norm": 1.81854484253563e-05, |
|
"learning_rate": 4.547157857845714e-08, |
|
"loss": 0.0, |
|
"step": 48700 |
|
}, |
|
{ |
|
"epoch": 5.919680968013404, |
|
"grad_norm": 1.9178327420377173e-05, |
|
"learning_rate": 4.533678407268119e-08, |
|
"loss": 0.0, |
|
"step": 48800 |
|
}, |
|
{ |
|
"epoch": 5.931811461800317, |
|
"grad_norm": 3.179845953127369e-05, |
|
"learning_rate": 4.520198956690525e-08, |
|
"loss": 0.0, |
|
"step": 48900 |
|
}, |
|
{ |
|
"epoch": 5.9439419555872295, |
|
"grad_norm": 2.309592491656076e-05, |
|
"learning_rate": 4.5067195061129304e-08, |
|
"loss": 0.0, |
|
"step": 49000 |
|
}, |
|
{ |
|
"epoch": 5.956072449374142, |
|
"grad_norm": 1.0060489330498967e-05, |
|
"learning_rate": 4.4932400555353363e-08, |
|
"loss": 0.0, |
|
"step": 49100 |
|
}, |
|
{ |
|
"epoch": 5.968202943161055, |
|
"grad_norm": 2.0553434296743944e-05, |
|
"learning_rate": 4.4797606049577416e-08, |
|
"loss": 0.0, |
|
"step": 49200 |
|
}, |
|
{ |
|
"epoch": 5.9803334369479675, |
|
"grad_norm": 3.8277423300314695e-05, |
|
"learning_rate": 4.466281154380147e-08, |
|
"loss": 0.0, |
|
"step": 49300 |
|
}, |
|
{ |
|
"epoch": 5.99246393073488, |
|
"grad_norm": 6.001651854603551e-06, |
|
"learning_rate": 4.452801703802553e-08, |
|
"loss": 0.0, |
|
"step": 49400 |
|
}, |
|
{ |
|
"epoch": 6.004594424521793, |
|
"grad_norm": 1.8310502127860673e-05, |
|
"learning_rate": 4.4394570477307344e-08, |
|
"loss": 0.0, |
|
"step": 49500 |
|
}, |
|
{ |
|
"epoch": 6.0167249183087055, |
|
"grad_norm": 2.0332221538410522e-05, |
|
"learning_rate": 4.4259775971531397e-08, |
|
"loss": 0.0, |
|
"step": 49600 |
|
}, |
|
{ |
|
"epoch": 6.028855412095619, |
|
"grad_norm": 5.2164625230943784e-05, |
|
"learning_rate": 4.4124981465755456e-08, |
|
"loss": 0.0, |
|
"step": 49700 |
|
}, |
|
{ |
|
"epoch": 6.040985905882532, |
|
"grad_norm": 3.2575280783930793e-05, |
|
"learning_rate": 4.399018695997951e-08, |
|
"loss": 0.0, |
|
"step": 49800 |
|
}, |
|
{ |
|
"epoch": 6.053116399669444, |
|
"grad_norm": 3.4000044252024963e-05, |
|
"learning_rate": 4.385539245420357e-08, |
|
"loss": 0.0, |
|
"step": 49900 |
|
}, |
|
{ |
|
"epoch": 6.065246893456357, |
|
"grad_norm": 3.062421455979347e-05, |
|
"learning_rate": 4.372059794842762e-08, |
|
"loss": 0.0, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.065246893456357, |
|
"eval_loss": 6.81196823393293e-08, |
|
"eval_runtime": 12731.5813, |
|
"eval_samples_per_second": 33.152, |
|
"eval_steps_per_second": 4.144, |
|
"step": 50000 |
|
}, |
|
{ |
|
"epoch": 6.07737738724327, |
|
"grad_norm": 2.012972254306078e-05, |
|
"learning_rate": 4.358580344265167e-08, |
|
"loss": 0.0, |
|
"step": 50100 |
|
}, |
|
{ |
|
"epoch": 6.089507881030182, |
|
"grad_norm": 1.4403743080038112e-05, |
|
"learning_rate": 4.345100893687573e-08, |
|
"loss": 0.0, |
|
"step": 50200 |
|
}, |
|
{ |
|
"epoch": 6.101638374817095, |
|
"grad_norm": 3.0759158107684925e-05, |
|
"learning_rate": 4.3316214431099785e-08, |
|
"loss": 0.0, |
|
"step": 50300 |
|
}, |
|
{ |
|
"epoch": 6.113768868604008, |
|
"grad_norm": 3.788939648075029e-05, |
|
"learning_rate": 4.3181419925323844e-08, |
|
"loss": 0.0, |
|
"step": 50400 |
|
}, |
|
{ |
|
"epoch": 6.12589936239092, |
|
"grad_norm": 2.2543508748640306e-05, |
|
"learning_rate": 4.30466254195479e-08, |
|
"loss": 0.0, |
|
"step": 50500 |
|
}, |
|
{ |
|
"epoch": 6.138029856177833, |
|
"grad_norm": 1.6207653970923275e-05, |
|
"learning_rate": 4.291183091377195e-08, |
|
"loss": 0.0, |
|
"step": 50600 |
|
}, |
|
{ |
|
"epoch": 6.150160349964746, |
|
"grad_norm": 6.610866421397077e-06, |
|
"learning_rate": 4.277703640799601e-08, |
|
"loss": 0.0, |
|
"step": 50700 |
|
}, |
|
{ |
|
"epoch": 6.162290843751658, |
|
"grad_norm": 1.5873067241045646e-05, |
|
"learning_rate": 4.264224190222007e-08, |
|
"loss": 0.0, |
|
"step": 50800 |
|
}, |
|
{ |
|
"epoch": 6.174421337538571, |
|
"grad_norm": 6.631801625189837e-06, |
|
"learning_rate": 4.250744739644412e-08, |
|
"loss": 0.0, |
|
"step": 50900 |
|
}, |
|
{ |
|
"epoch": 6.186551831325484, |
|
"grad_norm": 1.6326004697475582e-05, |
|
"learning_rate": 4.2372652890668174e-08, |
|
"loss": 0.0, |
|
"step": 51000 |
|
}, |
|
{ |
|
"epoch": 6.198682325112396, |
|
"grad_norm": 1.117045758292079e-05, |
|
"learning_rate": 4.2237858384892226e-08, |
|
"loss": 0.0, |
|
"step": 51100 |
|
}, |
|
{ |
|
"epoch": 6.210812818899309, |
|
"grad_norm": 7.856343472667504e-06, |
|
"learning_rate": 4.2103063879116286e-08, |
|
"loss": 0.0, |
|
"step": 51200 |
|
}, |
|
{ |
|
"epoch": 6.222943312686222, |
|
"grad_norm": 2.9008153433096595e-05, |
|
"learning_rate": 4.1968269373340345e-08, |
|
"loss": 0.0, |
|
"step": 51300 |
|
}, |
|
{ |
|
"epoch": 6.235073806473134, |
|
"grad_norm": 5.562766091316007e-06, |
|
"learning_rate": 4.18334748675644e-08, |
|
"loss": 0.0, |
|
"step": 51400 |
|
}, |
|
{ |
|
"epoch": 6.247204300260048, |
|
"grad_norm": 6.107033186708577e-06, |
|
"learning_rate": 4.169868036178845e-08, |
|
"loss": 0.0, |
|
"step": 51500 |
|
}, |
|
{ |
|
"epoch": 6.259334794046961, |
|
"grad_norm": 7.988614015630446e-06, |
|
"learning_rate": 4.156388585601251e-08, |
|
"loss": 0.0, |
|
"step": 51600 |
|
}, |
|
{ |
|
"epoch": 6.271465287833873, |
|
"grad_norm": 2.7623522328212857e-05, |
|
"learning_rate": 4.142909135023656e-08, |
|
"loss": 0.0, |
|
"step": 51700 |
|
}, |
|
{ |
|
"epoch": 6.283595781620786, |
|
"grad_norm": 3.2249143259832636e-05, |
|
"learning_rate": 4.129429684446062e-08, |
|
"loss": 0.0, |
|
"step": 51800 |
|
}, |
|
{ |
|
"epoch": 6.295726275407699, |
|
"grad_norm": 7.830405593267642e-06, |
|
"learning_rate": 4.115950233868467e-08, |
|
"loss": 0.0, |
|
"step": 51900 |
|
}, |
|
{ |
|
"epoch": 6.307856769194611, |
|
"grad_norm": 2.494780892448034e-05, |
|
"learning_rate": 4.102470783290873e-08, |
|
"loss": 0.0, |
|
"step": 52000 |
|
}, |
|
{ |
|
"epoch": 6.319987262981524, |
|
"grad_norm": 3.023298086191062e-05, |
|
"learning_rate": 4.0889913327132786e-08, |
|
"loss": 0.0, |
|
"step": 52100 |
|
}, |
|
{ |
|
"epoch": 6.332117756768437, |
|
"grad_norm": 1.9346003682585433e-05, |
|
"learning_rate": 4.075511882135684e-08, |
|
"loss": 0.0, |
|
"step": 52200 |
|
}, |
|
{ |
|
"epoch": 6.344248250555349, |
|
"grad_norm": 2.7946347472607158e-05, |
|
"learning_rate": 4.06203243155809e-08, |
|
"loss": 0.0, |
|
"step": 52300 |
|
}, |
|
{ |
|
"epoch": 6.356378744342262, |
|
"grad_norm": 1.0151994501939043e-05, |
|
"learning_rate": 4.048552980980495e-08, |
|
"loss": 0.0, |
|
"step": 52400 |
|
}, |
|
{ |
|
"epoch": 6.368509238129175, |
|
"grad_norm": 2.9524355340981856e-05, |
|
"learning_rate": 4.0350735304029003e-08, |
|
"loss": 0.0, |
|
"step": 52500 |
|
}, |
|
{ |
|
"epoch": 6.380639731916087, |
|
"grad_norm": 6.609189313167008e-06, |
|
"learning_rate": 4.021594079825306e-08, |
|
"loss": 0.0, |
|
"step": 52600 |
|
}, |
|
{ |
|
"epoch": 6.392770225703, |
|
"grad_norm": 1.3056687748758122e-05, |
|
"learning_rate": 4.008114629247712e-08, |
|
"loss": 0.0, |
|
"step": 52700 |
|
}, |
|
{ |
|
"epoch": 6.404900719489913, |
|
"grad_norm": 4.907285529043293e-06, |
|
"learning_rate": 3.994635178670117e-08, |
|
"loss": 0.0, |
|
"step": 52800 |
|
}, |
|
{ |
|
"epoch": 6.417031213276825, |
|
"grad_norm": 1.0377465514466166e-05, |
|
"learning_rate": 3.981155728092523e-08, |
|
"loss": 0.0, |
|
"step": 52900 |
|
}, |
|
{ |
|
"epoch": 6.429161707063738, |
|
"grad_norm": 1.003842680802336e-05, |
|
"learning_rate": 3.967676277514928e-08, |
|
"loss": 0.0, |
|
"step": 53000 |
|
}, |
|
{ |
|
"epoch": 6.441292200850651, |
|
"grad_norm": 7.876193194533698e-06, |
|
"learning_rate": 3.954196826937334e-08, |
|
"loss": 0.0, |
|
"step": 53100 |
|
}, |
|
{ |
|
"epoch": 6.453422694637563, |
|
"grad_norm": 4.641383384296205e-06, |
|
"learning_rate": 3.94071737635974e-08, |
|
"loss": 0.0, |
|
"step": 53200 |
|
}, |
|
{ |
|
"epoch": 6.465553188424476, |
|
"grad_norm": 5.234006493992638e-06, |
|
"learning_rate": 3.9272379257821445e-08, |
|
"loss": 0.0, |
|
"step": 53300 |
|
}, |
|
{ |
|
"epoch": 6.477683682211389, |
|
"grad_norm": 6.524358468595892e-06, |
|
"learning_rate": 3.9137584752045504e-08, |
|
"loss": 0.0, |
|
"step": 53400 |
|
}, |
|
{ |
|
"epoch": 6.489814175998302, |
|
"grad_norm": 2.1887204638915136e-05, |
|
"learning_rate": 3.900279024626956e-08, |
|
"loss": 0.0, |
|
"step": 53500 |
|
}, |
|
{ |
|
"epoch": 6.501944669785215, |
|
"grad_norm": 8.637114660814404e-06, |
|
"learning_rate": 3.8867995740493616e-08, |
|
"loss": 0.0, |
|
"step": 53600 |
|
}, |
|
{ |
|
"epoch": 6.5140751635721275, |
|
"grad_norm": 8.20979676063871e-06, |
|
"learning_rate": 3.8733201234717675e-08, |
|
"loss": 0.0, |
|
"step": 53700 |
|
}, |
|
{ |
|
"epoch": 6.52620565735904, |
|
"grad_norm": 4.2486276470299345e-06, |
|
"learning_rate": 3.859840672894173e-08, |
|
"loss": 0.0, |
|
"step": 53800 |
|
}, |
|
{ |
|
"epoch": 6.538336151145953, |
|
"grad_norm": 1.3307449080457445e-05, |
|
"learning_rate": 3.846361222316578e-08, |
|
"loss": 0.0, |
|
"step": 53900 |
|
}, |
|
{ |
|
"epoch": 6.5504666449328655, |
|
"grad_norm": 5.706210231437581e-06, |
|
"learning_rate": 3.832881771738984e-08, |
|
"loss": 0.0, |
|
"step": 54000 |
|
}, |
|
{ |
|
"epoch": 6.562597138719778, |
|
"grad_norm": 1.1756884305214044e-05, |
|
"learning_rate": 3.819402321161389e-08, |
|
"loss": 0.0, |
|
"step": 54100 |
|
}, |
|
{ |
|
"epoch": 6.574727632506691, |
|
"grad_norm": 6.582447895198129e-06, |
|
"learning_rate": 3.8059228705837945e-08, |
|
"loss": 0.0, |
|
"step": 54200 |
|
}, |
|
{ |
|
"epoch": 6.5868581262936035, |
|
"grad_norm": 1.4298544556368142e-05, |
|
"learning_rate": 3.7924434200062004e-08, |
|
"loss": 0.0, |
|
"step": 54300 |
|
}, |
|
{ |
|
"epoch": 6.598988620080516, |
|
"grad_norm": 1.3964708159619477e-05, |
|
"learning_rate": 3.778963969428606e-08, |
|
"loss": 0.0, |
|
"step": 54400 |
|
}, |
|
{ |
|
"epoch": 6.611119113867429, |
|
"grad_norm": 9.269981092074886e-06, |
|
"learning_rate": 3.7654845188510116e-08, |
|
"loss": 0.0, |
|
"step": 54500 |
|
}, |
|
{ |
|
"epoch": 6.6232496076543415, |
|
"grad_norm": 1.1989985978289042e-05, |
|
"learning_rate": 3.7520050682734175e-08, |
|
"loss": 0.0, |
|
"step": 54600 |
|
}, |
|
{ |
|
"epoch": 6.635380101441254, |
|
"grad_norm": 1.2308242730796337e-05, |
|
"learning_rate": 3.738525617695822e-08, |
|
"loss": 0.0, |
|
"step": 54700 |
|
}, |
|
{ |
|
"epoch": 6.647510595228167, |
|
"grad_norm": 1.537953721708618e-05, |
|
"learning_rate": 3.725046167118228e-08, |
|
"loss": 0.0, |
|
"step": 54800 |
|
}, |
|
{ |
|
"epoch": 6.6596410890150795, |
|
"grad_norm": 1.1343098776706029e-05, |
|
"learning_rate": 3.711566716540634e-08, |
|
"loss": 0.0, |
|
"step": 54900 |
|
}, |
|
{ |
|
"epoch": 6.671771582801992, |
|
"grad_norm": 1.3526327165891416e-05, |
|
"learning_rate": 3.698087265963039e-08, |
|
"loss": 0.0, |
|
"step": 55000 |
|
}, |
|
{ |
|
"epoch": 6.683902076588905, |
|
"grad_norm": 2.0161125576123595e-05, |
|
"learning_rate": 3.6846078153854445e-08, |
|
"loss": 0.0, |
|
"step": 55100 |
|
}, |
|
{ |
|
"epoch": 6.696032570375818, |
|
"grad_norm": 6.1126788750698324e-06, |
|
"learning_rate": 3.67112836480785e-08, |
|
"loss": 0.0, |
|
"step": 55200 |
|
}, |
|
{ |
|
"epoch": 6.708163064162731, |
|
"grad_norm": 4.200906005280558e-06, |
|
"learning_rate": 3.657648914230256e-08, |
|
"loss": 0.0, |
|
"step": 55300 |
|
}, |
|
{ |
|
"epoch": 6.720293557949644, |
|
"grad_norm": 4.649086349672871e-06, |
|
"learning_rate": 3.6441694636526617e-08, |
|
"loss": 0.0, |
|
"step": 55400 |
|
}, |
|
{ |
|
"epoch": 6.732424051736556, |
|
"grad_norm": 8.526422789145727e-06, |
|
"learning_rate": 3.630690013075067e-08, |
|
"loss": 0.0, |
|
"step": 55500 |
|
}, |
|
{ |
|
"epoch": 6.744554545523469, |
|
"grad_norm": 7.695515705563594e-06, |
|
"learning_rate": 3.617210562497472e-08, |
|
"loss": 0.0, |
|
"step": 55600 |
|
}, |
|
{ |
|
"epoch": 6.756685039310382, |
|
"grad_norm": 7.190765245468356e-06, |
|
"learning_rate": 3.603731111919878e-08, |
|
"loss": 0.0, |
|
"step": 55700 |
|
}, |
|
{ |
|
"epoch": 6.768815533097294, |
|
"grad_norm": 1.0542355994402897e-05, |
|
"learning_rate": 3.5902516613422834e-08, |
|
"loss": 0.0, |
|
"step": 55800 |
|
}, |
|
{ |
|
"epoch": 6.780946026884207, |
|
"grad_norm": 4.001772595074726e-06, |
|
"learning_rate": 3.576772210764689e-08, |
|
"loss": 0.0, |
|
"step": 55900 |
|
}, |
|
{ |
|
"epoch": 6.79307652067112, |
|
"grad_norm": 6.5062290559581015e-06, |
|
"learning_rate": 3.5632927601870946e-08, |
|
"loss": 0.0, |
|
"step": 56000 |
|
}, |
|
{ |
|
"epoch": 6.805207014458032, |
|
"grad_norm": 4.772533429786563e-06, |
|
"learning_rate": 3.5498133096095e-08, |
|
"loss": 0.0, |
|
"step": 56100 |
|
}, |
|
{ |
|
"epoch": 6.817337508244945, |
|
"grad_norm": 1.6134656107169576e-05, |
|
"learning_rate": 3.536333859031906e-08, |
|
"loss": 0.0, |
|
"step": 56200 |
|
}, |
|
{ |
|
"epoch": 6.829468002031858, |
|
"grad_norm": 5.383319603424752e-06, |
|
"learning_rate": 3.522854408454311e-08, |
|
"loss": 0.0, |
|
"step": 56300 |
|
}, |
|
{ |
|
"epoch": 6.84159849581877, |
|
"grad_norm": 6.041369488229975e-06, |
|
"learning_rate": 3.509374957876717e-08, |
|
"loss": 0.0, |
|
"step": 56400 |
|
}, |
|
{ |
|
"epoch": 6.853728989605683, |
|
"grad_norm": 4.105105290364008e-06, |
|
"learning_rate": 3.495895507299122e-08, |
|
"loss": 0.0, |
|
"step": 56500 |
|
}, |
|
{ |
|
"epoch": 6.865859483392596, |
|
"grad_norm": 7.601636298204539e-06, |
|
"learning_rate": 3.4824160567215275e-08, |
|
"loss": 0.0, |
|
"step": 56600 |
|
}, |
|
{ |
|
"epoch": 6.877989977179508, |
|
"grad_norm": 4.2275514715583995e-06, |
|
"learning_rate": 3.4689366061439334e-08, |
|
"loss": 0.0, |
|
"step": 56700 |
|
}, |
|
{ |
|
"epoch": 6.890120470966421, |
|
"grad_norm": 4.698940756497905e-06, |
|
"learning_rate": 3.4554571555663394e-08, |
|
"loss": 0.0, |
|
"step": 56800 |
|
}, |
|
{ |
|
"epoch": 6.902250964753334, |
|
"grad_norm": 1.0010462574427947e-05, |
|
"learning_rate": 3.4419777049887446e-08, |
|
"loss": 0.0, |
|
"step": 56900 |
|
}, |
|
{ |
|
"epoch": 6.914381458540246, |
|
"grad_norm": 5.1377683121245354e-06, |
|
"learning_rate": 3.42849825441115e-08, |
|
"loss": 0.0, |
|
"step": 57000 |
|
}, |
|
{ |
|
"epoch": 6.926511952327159, |
|
"grad_norm": 1.253222580999136e-05, |
|
"learning_rate": 3.415018803833555e-08, |
|
"loss": 0.0, |
|
"step": 57100 |
|
}, |
|
{ |
|
"epoch": 6.938642446114072, |
|
"grad_norm": 1.1987819561909419e-05, |
|
"learning_rate": 3.401539353255961e-08, |
|
"loss": 0.0, |
|
"step": 57200 |
|
}, |
|
{ |
|
"epoch": 6.950772939900985, |
|
"grad_norm": 7.4161894190183375e-06, |
|
"learning_rate": 3.388059902678367e-08, |
|
"loss": 0.0, |
|
"step": 57300 |
|
}, |
|
{ |
|
"epoch": 6.962903433687898, |
|
"grad_norm": 4.78677748105838e-06, |
|
"learning_rate": 3.374580452100772e-08, |
|
"loss": 0.0, |
|
"step": 57400 |
|
}, |
|
{ |
|
"epoch": 6.9750339274748105, |
|
"grad_norm": 1.2344567949185148e-05, |
|
"learning_rate": 3.3611010015231775e-08, |
|
"loss": 0.0, |
|
"step": 57500 |
|
}, |
|
{ |
|
"epoch": 6.987164421261723, |
|
"grad_norm": 4.33678769695689e-06, |
|
"learning_rate": 3.3476215509455835e-08, |
|
"loss": 0.0, |
|
"step": 57600 |
|
}, |
|
{ |
|
"epoch": 6.999294915048636, |
|
"grad_norm": 1.138456991611747e-05, |
|
"learning_rate": 3.334142100367989e-08, |
|
"loss": 0.0, |
|
"step": 57700 |
|
}, |
|
{ |
|
"epoch": 7.0114254088355485, |
|
"grad_norm": 1.339143818768207e-05, |
|
"learning_rate": 3.3207974442961703e-08, |
|
"loss": 0.0, |
|
"step": 57800 |
|
}, |
|
{ |
|
"epoch": 7.023555902622461, |
|
"grad_norm": 7.87022327131126e-06, |
|
"learning_rate": 3.3073179937185756e-08, |
|
"loss": 0.0, |
|
"step": 57900 |
|
}, |
|
{ |
|
"epoch": 7.035686396409374, |
|
"grad_norm": 4.470041858439799e-06, |
|
"learning_rate": 3.2938385431409815e-08, |
|
"loss": 0.0, |
|
"step": 58000 |
|
}, |
|
{ |
|
"epoch": 7.0478168901962865, |
|
"grad_norm": 5.98327596890158e-06, |
|
"learning_rate": 3.280359092563387e-08, |
|
"loss": 0.0, |
|
"step": 58100 |
|
}, |
|
{ |
|
"epoch": 7.059947383983199, |
|
"grad_norm": 6.159062650112901e-06, |
|
"learning_rate": 3.266879641985793e-08, |
|
"loss": 0.0, |
|
"step": 58200 |
|
}, |
|
{ |
|
"epoch": 7.072077877770112, |
|
"grad_norm": 1.162042190117063e-05, |
|
"learning_rate": 3.253400191408198e-08, |
|
"loss": 0.0, |
|
"step": 58300 |
|
}, |
|
{ |
|
"epoch": 7.0842083715570245, |
|
"grad_norm": 8.919217179936823e-06, |
|
"learning_rate": 3.239920740830603e-08, |
|
"loss": 0.0, |
|
"step": 58400 |
|
}, |
|
{ |
|
"epoch": 7.096338865343937, |
|
"grad_norm": 4.296573024475947e-06, |
|
"learning_rate": 3.226441290253009e-08, |
|
"loss": 0.0, |
|
"step": 58500 |
|
}, |
|
{ |
|
"epoch": 7.10846935913085, |
|
"grad_norm": 3.882557848555734e-06, |
|
"learning_rate": 3.212961839675415e-08, |
|
"loss": 0.0, |
|
"step": 58600 |
|
}, |
|
{ |
|
"epoch": 7.1205998529177625, |
|
"grad_norm": 1.2772562513418961e-05, |
|
"learning_rate": 3.1994823890978204e-08, |
|
"loss": 0.0, |
|
"step": 58700 |
|
}, |
|
{ |
|
"epoch": 7.132730346704675, |
|
"grad_norm": 1.5256729057000484e-05, |
|
"learning_rate": 3.1860029385202257e-08, |
|
"loss": 0.0, |
|
"step": 58800 |
|
}, |
|
{ |
|
"epoch": 7.144860840491588, |
|
"grad_norm": 1.280256674363045e-05, |
|
"learning_rate": 3.1725234879426316e-08, |
|
"loss": 0.0, |
|
"step": 58900 |
|
}, |
|
{ |
|
"epoch": 7.156991334278501, |
|
"grad_norm": 1.1524730325618293e-05, |
|
"learning_rate": 3.159044037365037e-08, |
|
"loss": 0.0, |
|
"step": 59000 |
|
}, |
|
{ |
|
"epoch": 7.169121828065414, |
|
"grad_norm": 4.267490567144705e-06, |
|
"learning_rate": 3.145564586787443e-08, |
|
"loss": 0.0, |
|
"step": 59100 |
|
}, |
|
{ |
|
"epoch": 7.181252321852327, |
|
"grad_norm": 5.201383373787394e-06, |
|
"learning_rate": 3.132085136209848e-08, |
|
"loss": 0.0, |
|
"step": 59200 |
|
}, |
|
{ |
|
"epoch": 7.193382815639239, |
|
"grad_norm": 1.4003146134200506e-05, |
|
"learning_rate": 3.118605685632253e-08, |
|
"loss": 0.0, |
|
"step": 59300 |
|
}, |
|
{ |
|
"epoch": 7.205513309426152, |
|
"grad_norm": 1.0509905223443639e-05, |
|
"learning_rate": 3.105126235054659e-08, |
|
"loss": 0.0, |
|
"step": 59400 |
|
}, |
|
{ |
|
"epoch": 7.217643803213065, |
|
"grad_norm": 8.109111149678938e-06, |
|
"learning_rate": 3.0916467844770645e-08, |
|
"loss": 0.0, |
|
"step": 59500 |
|
}, |
|
{ |
|
"epoch": 7.229774296999977, |
|
"grad_norm": 5.669727670465363e-06, |
|
"learning_rate": 3.0781673338994704e-08, |
|
"loss": 0.0, |
|
"step": 59600 |
|
}, |
|
{ |
|
"epoch": 7.24190479078689, |
|
"grad_norm": 4.445894774107728e-06, |
|
"learning_rate": 3.064687883321876e-08, |
|
"loss": 0.0, |
|
"step": 59700 |
|
}, |
|
{ |
|
"epoch": 7.254035284573803, |
|
"grad_norm": 4.052724307257449e-06, |
|
"learning_rate": 3.051208432744281e-08, |
|
"loss": 0.0, |
|
"step": 59800 |
|
}, |
|
{ |
|
"epoch": 7.266165778360715, |
|
"grad_norm": 1.6403826521127485e-05, |
|
"learning_rate": 3.037728982166687e-08, |
|
"loss": 0.0, |
|
"step": 59900 |
|
}, |
|
{ |
|
"epoch": 7.278296272147628, |
|
"grad_norm": 3.227706656616647e-06, |
|
"learning_rate": 3.024249531589092e-08, |
|
"loss": 0.0, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 7.278296272147628, |
|
"eval_loss": 5.67662823414139e-08, |
|
"eval_runtime": 12876.5575, |
|
"eval_samples_per_second": 32.778, |
|
"eval_steps_per_second": 4.097, |
|
"step": 60000 |
|
}, |
|
{ |
|
"epoch": 7.290426765934541, |
|
"grad_norm": 5.48078151041409e-06, |
|
"learning_rate": 3.010770081011498e-08, |
|
"loss": 0.0, |
|
"step": 60100 |
|
}, |
|
{ |
|
"epoch": 7.302557259721453, |
|
"grad_norm": 6.255341304495232e-06, |
|
"learning_rate": 2.9972906304339034e-08, |
|
"loss": 0.0, |
|
"step": 60200 |
|
}, |
|
{ |
|
"epoch": 7.314687753508366, |
|
"grad_norm": 7.36140327717294e-06, |
|
"learning_rate": 2.9838111798563086e-08, |
|
"loss": 0.0, |
|
"step": 60300 |
|
}, |
|
{ |
|
"epoch": 7.326818247295279, |
|
"grad_norm": 1.1941012417082675e-05, |
|
"learning_rate": 2.9703317292787145e-08, |
|
"loss": 0.0, |
|
"step": 60400 |
|
}, |
|
{ |
|
"epoch": 7.338948741082191, |
|
"grad_norm": 1.1292297131149098e-05, |
|
"learning_rate": 2.95685227870112e-08, |
|
"loss": 0.0, |
|
"step": 60500 |
|
}, |
|
{ |
|
"epoch": 7.351079234869104, |
|
"grad_norm": 3.3545572932780487e-06, |
|
"learning_rate": 2.9433728281235254e-08, |
|
"loss": 0.0, |
|
"step": 60600 |
|
}, |
|
{ |
|
"epoch": 7.363209728656017, |
|
"grad_norm": 1.2666420843743253e-05, |
|
"learning_rate": 2.929893377545931e-08, |
|
"loss": 0.0, |
|
"step": 60700 |
|
}, |
|
{ |
|
"epoch": 7.375340222442929, |
|
"grad_norm": 1.1807159353338648e-05, |
|
"learning_rate": 2.916413926968337e-08, |
|
"loss": 0.0, |
|
"step": 60800 |
|
}, |
|
{ |
|
"epoch": 7.387470716229842, |
|
"grad_norm": 1.095084189728368e-05, |
|
"learning_rate": 2.9029344763907422e-08, |
|
"loss": 0.0, |
|
"step": 60900 |
|
}, |
|
{ |
|
"epoch": 7.399601210016756, |
|
"grad_norm": 9.25906715565361e-06, |
|
"learning_rate": 2.8894550258131478e-08, |
|
"loss": 0.0, |
|
"step": 61000 |
|
}, |
|
{ |
|
"epoch": 7.411731703803668, |
|
"grad_norm": 7.390953214780893e-06, |
|
"learning_rate": 2.875975575235553e-08, |
|
"loss": 0.0, |
|
"step": 61100 |
|
}, |
|
{ |
|
"epoch": 7.423862197590581, |
|
"grad_norm": 6.920663963683182e-06, |
|
"learning_rate": 2.8624961246579587e-08, |
|
"loss": 0.0, |
|
"step": 61200 |
|
}, |
|
{ |
|
"epoch": 7.435992691377494, |
|
"grad_norm": 7.917548828118015e-06, |
|
"learning_rate": 2.8490166740803646e-08, |
|
"loss": 0.0, |
|
"step": 61300 |
|
}, |
|
{ |
|
"epoch": 7.448123185164406, |
|
"grad_norm": 5.376084118324798e-06, |
|
"learning_rate": 2.83553722350277e-08, |
|
"loss": 0.0, |
|
"step": 61400 |
|
}, |
|
{ |
|
"epoch": 7.460253678951319, |
|
"grad_norm": 4.198305305180838e-06, |
|
"learning_rate": 2.8220577729251755e-08, |
|
"loss": 0.0, |
|
"step": 61500 |
|
}, |
|
{ |
|
"epoch": 7.472384172738232, |
|
"grad_norm": 4.916752004646696e-06, |
|
"learning_rate": 2.808578322347581e-08, |
|
"loss": 0.0, |
|
"step": 61600 |
|
}, |
|
{ |
|
"epoch": 7.484514666525144, |
|
"grad_norm": 1.2863613847002853e-05, |
|
"learning_rate": 2.7950988717699863e-08, |
|
"loss": 0.0, |
|
"step": 61700 |
|
}, |
|
{ |
|
"epoch": 7.496645160312057, |
|
"grad_norm": 1.0277097317157313e-05, |
|
"learning_rate": 2.7817542156981682e-08, |
|
"loss": 0.0, |
|
"step": 61800 |
|
}, |
|
{ |
|
"epoch": 7.50877565409897, |
|
"grad_norm": 1.250240620720433e-05, |
|
"learning_rate": 2.7682747651205735e-08, |
|
"loss": 0.0, |
|
"step": 61900 |
|
}, |
|
{ |
|
"epoch": 7.520906147885882, |
|
"grad_norm": 9.138646419160068e-06, |
|
"learning_rate": 2.754795314542979e-08, |
|
"loss": 0.0, |
|
"step": 62000 |
|
}, |
|
{ |
|
"epoch": 7.533036641672795, |
|
"grad_norm": 7.881306373747066e-06, |
|
"learning_rate": 2.7413158639653844e-08, |
|
"loss": 0.0, |
|
"step": 62100 |
|
}, |
|
{ |
|
"epoch": 7.545167135459708, |
|
"grad_norm": 8.445715138805099e-06, |
|
"learning_rate": 2.72783641338779e-08, |
|
"loss": 0.0, |
|
"step": 62200 |
|
}, |
|
{ |
|
"epoch": 7.55729762924662, |
|
"grad_norm": 3.7977299598423997e-06, |
|
"learning_rate": 2.714356962810196e-08, |
|
"loss": 0.0, |
|
"step": 62300 |
|
}, |
|
{ |
|
"epoch": 7.569428123033533, |
|
"grad_norm": 5.859428711119108e-06, |
|
"learning_rate": 2.7008775122326012e-08, |
|
"loss": 0.0, |
|
"step": 62400 |
|
}, |
|
{ |
|
"epoch": 7.581558616820446, |
|
"grad_norm": 3.878424195136176e-06, |
|
"learning_rate": 2.6873980616550068e-08, |
|
"loss": 0.0, |
|
"step": 62500 |
|
}, |
|
{ |
|
"epoch": 7.593689110607358, |
|
"grad_norm": 6.818550446041627e-06, |
|
"learning_rate": 2.6739186110774127e-08, |
|
"loss": 0.0, |
|
"step": 62600 |
|
}, |
|
{ |
|
"epoch": 7.605819604394272, |
|
"grad_norm": 5.862772013642825e-06, |
|
"learning_rate": 2.6604391604998176e-08, |
|
"loss": 0.0, |
|
"step": 62700 |
|
}, |
|
{ |
|
"epoch": 7.6179500981811845, |
|
"grad_norm": 9.112359293794725e-06, |
|
"learning_rate": 2.6469597099222236e-08, |
|
"loss": 0.0, |
|
"step": 62800 |
|
}, |
|
{ |
|
"epoch": 7.630080591968097, |
|
"grad_norm": 6.844359177193837e-06, |
|
"learning_rate": 2.633480259344629e-08, |
|
"loss": 0.0, |
|
"step": 62900 |
|
}, |
|
{ |
|
"epoch": 7.64221108575501, |
|
"grad_norm": 1.056177916325396e-05, |
|
"learning_rate": 2.6200008087670344e-08, |
|
"loss": 0.0, |
|
"step": 63000 |
|
}, |
|
{ |
|
"epoch": 7.6543415795419225, |
|
"grad_norm": 1.6292913642246276e-05, |
|
"learning_rate": 2.6065213581894403e-08, |
|
"loss": 0.0, |
|
"step": 63100 |
|
}, |
|
{ |
|
"epoch": 7.666472073328835, |
|
"grad_norm": 7.939475835883059e-06, |
|
"learning_rate": 2.5930419076118453e-08, |
|
"loss": 0.0, |
|
"step": 63200 |
|
}, |
|
{ |
|
"epoch": 7.678602567115748, |
|
"grad_norm": 1.4974369150877465e-05, |
|
"learning_rate": 2.5795624570342512e-08, |
|
"loss": 0.0, |
|
"step": 63300 |
|
}, |
|
{ |
|
"epoch": 7.6907330609026605, |
|
"grad_norm": 5.687543762178393e-06, |
|
"learning_rate": 2.5660830064566568e-08, |
|
"loss": 0.0, |
|
"step": 63400 |
|
}, |
|
{ |
|
"epoch": 7.702863554689573, |
|
"grad_norm": 8.041168257477693e-06, |
|
"learning_rate": 2.552603555879062e-08, |
|
"loss": 0.0, |
|
"step": 63500 |
|
}, |
|
{ |
|
"epoch": 7.714994048476486, |
|
"grad_norm": 9.756033250596374e-06, |
|
"learning_rate": 2.5391241053014677e-08, |
|
"loss": 0.0, |
|
"step": 63600 |
|
}, |
|
{ |
|
"epoch": 7.7271245422633985, |
|
"grad_norm": 8.254312888311688e-06, |
|
"learning_rate": 2.5256446547238736e-08, |
|
"loss": 0.0, |
|
"step": 63700 |
|
}, |
|
{ |
|
"epoch": 7.739255036050311, |
|
"grad_norm": 8.304762559419032e-06, |
|
"learning_rate": 2.512165204146279e-08, |
|
"loss": 0.0, |
|
"step": 63800 |
|
}, |
|
{ |
|
"epoch": 7.751385529837224, |
|
"grad_norm": 9.369220606458839e-06, |
|
"learning_rate": 2.4988205480744605e-08, |
|
"loss": 0.0, |
|
"step": 63900 |
|
}, |
|
{ |
|
"epoch": 7.7635160236241365, |
|
"grad_norm": 4.313039880798897e-06, |
|
"learning_rate": 2.4853410974968657e-08, |
|
"loss": 0.0, |
|
"step": 64000 |
|
}, |
|
{ |
|
"epoch": 7.775646517411049, |
|
"grad_norm": 1.0110463335877284e-05, |
|
"learning_rate": 2.4718616469192717e-08, |
|
"loss": 0.0, |
|
"step": 64100 |
|
}, |
|
{ |
|
"epoch": 7.787777011197962, |
|
"grad_norm": 5.044106728746556e-06, |
|
"learning_rate": 2.458382196341677e-08, |
|
"loss": 0.0, |
|
"step": 64200 |
|
}, |
|
{ |
|
"epoch": 7.7999075049848745, |
|
"grad_norm": 1.0012697202910203e-05, |
|
"learning_rate": 2.4449027457640825e-08, |
|
"loss": 0.0, |
|
"step": 64300 |
|
}, |
|
{ |
|
"epoch": 7.812037998771787, |
|
"grad_norm": 7.5458015089679975e-06, |
|
"learning_rate": 2.431423295186488e-08, |
|
"loss": 0.0, |
|
"step": 64400 |
|
}, |
|
{ |
|
"epoch": 7.8241684925587, |
|
"grad_norm": 6.187547569425078e-06, |
|
"learning_rate": 2.4179438446088937e-08, |
|
"loss": 0.0, |
|
"step": 64500 |
|
}, |
|
{ |
|
"epoch": 7.8362989863456125, |
|
"grad_norm": 6.914489858900197e-06, |
|
"learning_rate": 2.4044643940312993e-08, |
|
"loss": 0.0, |
|
"step": 64600 |
|
}, |
|
{ |
|
"epoch": 7.848429480132525, |
|
"grad_norm": 7.281971647898899e-06, |
|
"learning_rate": 2.3909849434537046e-08, |
|
"loss": 0.0, |
|
"step": 64700 |
|
}, |
|
{ |
|
"epoch": 7.860559973919439, |
|
"grad_norm": 1.0793314686452504e-05, |
|
"learning_rate": 2.3775054928761102e-08, |
|
"loss": 0.0, |
|
"step": 64800 |
|
}, |
|
{ |
|
"epoch": 7.872690467706351, |
|
"grad_norm": 8.61198077473091e-06, |
|
"learning_rate": 2.3640260422985158e-08, |
|
"loss": 0.0, |
|
"step": 64900 |
|
}, |
|
{ |
|
"epoch": 7.884820961493264, |
|
"grad_norm": 7.743517926428467e-06, |
|
"learning_rate": 2.3505465917209214e-08, |
|
"loss": 0.0, |
|
"step": 65000 |
|
}, |
|
{ |
|
"epoch": 7.896951455280177, |
|
"grad_norm": 6.104731710365741e-06, |
|
"learning_rate": 2.337067141143327e-08, |
|
"loss": 0.0, |
|
"step": 65100 |
|
}, |
|
{ |
|
"epoch": 7.909081949067089, |
|
"grad_norm": 1.2164388863311615e-05, |
|
"learning_rate": 2.3235876905657326e-08, |
|
"loss": 0.0, |
|
"step": 65200 |
|
}, |
|
{ |
|
"epoch": 7.921212442854002, |
|
"grad_norm": 8.761631761444733e-06, |
|
"learning_rate": 2.310108239988138e-08, |
|
"loss": 0.0, |
|
"step": 65300 |
|
}, |
|
{ |
|
"epoch": 7.933342936640915, |
|
"grad_norm": 5.0737071433104575e-06, |
|
"learning_rate": 2.2966287894105434e-08, |
|
"loss": 0.0, |
|
"step": 65400 |
|
}, |
|
{ |
|
"epoch": 7.945473430427827, |
|
"grad_norm": 1.1595148862397764e-05, |
|
"learning_rate": 2.283149338832949e-08, |
|
"loss": 0.0, |
|
"step": 65500 |
|
}, |
|
{ |
|
"epoch": 7.95760392421474, |
|
"grad_norm": 1.36161434056703e-05, |
|
"learning_rate": 2.2696698882553546e-08, |
|
"loss": 0.0, |
|
"step": 65600 |
|
}, |
|
{ |
|
"epoch": 7.969734418001653, |
|
"grad_norm": 5.490317107614828e-06, |
|
"learning_rate": 2.2561904376777602e-08, |
|
"loss": 0.0, |
|
"step": 65700 |
|
}, |
|
{ |
|
"epoch": 7.981864911788565, |
|
"grad_norm": 3.685940328068682e-06, |
|
"learning_rate": 2.2427109871001655e-08, |
|
"loss": 0.0, |
|
"step": 65800 |
|
}, |
|
{ |
|
"epoch": 7.993995405575478, |
|
"grad_norm": 3.629038928920636e-06, |
|
"learning_rate": 2.2293663310283474e-08, |
|
"loss": 0.0, |
|
"step": 65900 |
|
}, |
|
{ |
|
"epoch": 8.006125899362392, |
|
"grad_norm": 7.837747943995055e-06, |
|
"learning_rate": 2.2160216749565287e-08, |
|
"loss": 0.0, |
|
"step": 66000 |
|
}, |
|
{ |
|
"epoch": 8.018256393149304, |
|
"grad_norm": 5.70934435017989e-06, |
|
"learning_rate": 2.2025422243789343e-08, |
|
"loss": 0.0, |
|
"step": 66100 |
|
}, |
|
{ |
|
"epoch": 8.030386886936217, |
|
"grad_norm": 5.7746415222936776e-06, |
|
"learning_rate": 2.18906277380134e-08, |
|
"loss": 0.0, |
|
"step": 66200 |
|
}, |
|
{ |
|
"epoch": 8.04251738072313, |
|
"grad_norm": 3.810003818216501e-06, |
|
"learning_rate": 2.175583323223745e-08, |
|
"loss": 0.0, |
|
"step": 66300 |
|
}, |
|
{ |
|
"epoch": 8.054647874510042, |
|
"grad_norm": 1.444386816729093e-05, |
|
"learning_rate": 2.1621038726461507e-08, |
|
"loss": 0.0, |
|
"step": 66400 |
|
}, |
|
{ |
|
"epoch": 8.066778368296955, |
|
"grad_norm": 9.787572707864456e-06, |
|
"learning_rate": 2.1486244220685563e-08, |
|
"loss": 0.0, |
|
"step": 66500 |
|
}, |
|
{ |
|
"epoch": 8.078908862083868, |
|
"grad_norm": 6.40834969090065e-06, |
|
"learning_rate": 2.135144971490962e-08, |
|
"loss": 0.0, |
|
"step": 66600 |
|
}, |
|
{ |
|
"epoch": 8.09103935587078, |
|
"grad_norm": 1.2682895430771168e-05, |
|
"learning_rate": 2.1216655209133675e-08, |
|
"loss": 0.0, |
|
"step": 66700 |
|
}, |
|
{ |
|
"epoch": 8.103169849657693, |
|
"grad_norm": 1.2579374015331268e-05, |
|
"learning_rate": 2.1081860703357728e-08, |
|
"loss": 0.0, |
|
"step": 66800 |
|
}, |
|
{ |
|
"epoch": 8.115300343444606, |
|
"grad_norm": 6.404700343409786e-06, |
|
"learning_rate": 2.0947066197581787e-08, |
|
"loss": 0.0, |
|
"step": 66900 |
|
}, |
|
{ |
|
"epoch": 8.127430837231518, |
|
"grad_norm": 5.5169152801681776e-06, |
|
"learning_rate": 2.081227169180584e-08, |
|
"loss": 0.0, |
|
"step": 67000 |
|
}, |
|
{ |
|
"epoch": 8.139561331018431, |
|
"grad_norm": 8.979187441582326e-06, |
|
"learning_rate": 2.0677477186029896e-08, |
|
"loss": 0.0, |
|
"step": 67100 |
|
}, |
|
{ |
|
"epoch": 8.151691824805344, |
|
"grad_norm": 1.1302987331873737e-05, |
|
"learning_rate": 2.0542682680253952e-08, |
|
"loss": 0.0, |
|
"step": 67200 |
|
}, |
|
{ |
|
"epoch": 8.163822318592256, |
|
"grad_norm": 1.4383387679117732e-05, |
|
"learning_rate": 2.0407888174478008e-08, |
|
"loss": 0.0, |
|
"step": 67300 |
|
}, |
|
{ |
|
"epoch": 8.175952812379169, |
|
"grad_norm": 1.563242585689295e-05, |
|
"learning_rate": 2.0273093668702064e-08, |
|
"loss": 0.0, |
|
"step": 67400 |
|
}, |
|
{ |
|
"epoch": 8.188083306166082, |
|
"grad_norm": 6.9619700298062526e-06, |
|
"learning_rate": 2.0138299162926116e-08, |
|
"loss": 0.0, |
|
"step": 67500 |
|
}, |
|
{ |
|
"epoch": 8.200213799952994, |
|
"grad_norm": 1.1879750672960654e-05, |
|
"learning_rate": 2.0003504657150176e-08, |
|
"loss": 0.0, |
|
"step": 67600 |
|
}, |
|
{ |
|
"epoch": 8.212344293739907, |
|
"grad_norm": 7.085599918355001e-06, |
|
"learning_rate": 1.986871015137423e-08, |
|
"loss": 0.0, |
|
"step": 67700 |
|
}, |
|
{ |
|
"epoch": 8.22447478752682, |
|
"grad_norm": 1.2863686606578995e-05, |
|
"learning_rate": 1.9733915645598284e-08, |
|
"loss": 0.0, |
|
"step": 67800 |
|
}, |
|
{ |
|
"epoch": 8.236605281313732, |
|
"grad_norm": 9.520225830783602e-06, |
|
"learning_rate": 1.959912113982234e-08, |
|
"loss": 0.0, |
|
"step": 67900 |
|
}, |
|
{ |
|
"epoch": 8.248735775100645, |
|
"grad_norm": 1.3260582818475086e-05, |
|
"learning_rate": 1.9464326634046396e-08, |
|
"loss": 0.0, |
|
"step": 68000 |
|
}, |
|
{ |
|
"epoch": 8.260866268887558, |
|
"grad_norm": 9.987468729377724e-06, |
|
"learning_rate": 1.9329532128270452e-08, |
|
"loss": 0.0, |
|
"step": 68100 |
|
}, |
|
{ |
|
"epoch": 8.27299676267447, |
|
"grad_norm": 1.1559543054318056e-05, |
|
"learning_rate": 1.9194737622494505e-08, |
|
"loss": 0.0, |
|
"step": 68200 |
|
}, |
|
{ |
|
"epoch": 8.285127256461383, |
|
"grad_norm": 7.925504178274423e-06, |
|
"learning_rate": 1.905994311671856e-08, |
|
"loss": 0.0, |
|
"step": 68300 |
|
}, |
|
{ |
|
"epoch": 8.297257750248296, |
|
"grad_norm": 4.772101874550572e-06, |
|
"learning_rate": 1.8925148610942617e-08, |
|
"loss": 0.0, |
|
"step": 68400 |
|
}, |
|
{ |
|
"epoch": 8.309388244035208, |
|
"grad_norm": 9.931142813002225e-06, |
|
"learning_rate": 1.8790354105166673e-08, |
|
"loss": 0.0, |
|
"step": 68500 |
|
}, |
|
{ |
|
"epoch": 8.321518737822121, |
|
"grad_norm": 9.558188139635604e-06, |
|
"learning_rate": 1.8655559599390726e-08, |
|
"loss": 0.0, |
|
"step": 68600 |
|
}, |
|
{ |
|
"epoch": 8.333649231609034, |
|
"grad_norm": 5.009603682992747e-06, |
|
"learning_rate": 1.8520765093614785e-08, |
|
"loss": 0.0, |
|
"step": 68700 |
|
}, |
|
{ |
|
"epoch": 8.345779725395946, |
|
"grad_norm": 1.3090863831166644e-05, |
|
"learning_rate": 1.838597058783884e-08, |
|
"loss": 0.0, |
|
"step": 68800 |
|
}, |
|
{ |
|
"epoch": 8.357910219182859, |
|
"grad_norm": 1.2672931916313246e-05, |
|
"learning_rate": 1.8252524027120653e-08, |
|
"loss": 0.0, |
|
"step": 68900 |
|
}, |
|
{ |
|
"epoch": 8.370040712969772, |
|
"grad_norm": 1.1767956493713427e-05, |
|
"learning_rate": 1.811772952134471e-08, |
|
"loss": 0.0, |
|
"step": 69000 |
|
}, |
|
{ |
|
"epoch": 8.382171206756684, |
|
"grad_norm": 1.4513515452563297e-05, |
|
"learning_rate": 1.7982935015568765e-08, |
|
"loss": 0.0, |
|
"step": 69100 |
|
}, |
|
{ |
|
"epoch": 8.394301700543597, |
|
"grad_norm": 4.3318741518305615e-06, |
|
"learning_rate": 1.7848140509792818e-08, |
|
"loss": 0.0, |
|
"step": 69200 |
|
}, |
|
{ |
|
"epoch": 8.40643219433051, |
|
"grad_norm": 1.4126319911156315e-05, |
|
"learning_rate": 1.7713346004016874e-08, |
|
"loss": 0.0, |
|
"step": 69300 |
|
}, |
|
{ |
|
"epoch": 8.418562688117424, |
|
"grad_norm": 9.831867828324903e-06, |
|
"learning_rate": 1.757855149824093e-08, |
|
"loss": 0.0, |
|
"step": 69400 |
|
}, |
|
{ |
|
"epoch": 8.430693181904337, |
|
"grad_norm": 1.695728860795498e-05, |
|
"learning_rate": 1.7443756992464986e-08, |
|
"loss": 0.0, |
|
"step": 69500 |
|
}, |
|
{ |
|
"epoch": 8.44282367569125, |
|
"grad_norm": 5.640126801154111e-06, |
|
"learning_rate": 1.7308962486689042e-08, |
|
"loss": 0.0, |
|
"step": 69600 |
|
}, |
|
{ |
|
"epoch": 8.454954169478162, |
|
"grad_norm": 9.565230357111432e-06, |
|
"learning_rate": 1.7174167980913098e-08, |
|
"loss": 0.0, |
|
"step": 69700 |
|
}, |
|
{ |
|
"epoch": 8.467084663265075, |
|
"grad_norm": 4.890798209089553e-06, |
|
"learning_rate": 1.7039373475137154e-08, |
|
"loss": 0.0, |
|
"step": 69800 |
|
}, |
|
{ |
|
"epoch": 8.479215157051987, |
|
"grad_norm": 3.091217422479531e-06, |
|
"learning_rate": 1.6904578969361207e-08, |
|
"loss": 0.0, |
|
"step": 69900 |
|
}, |
|
{ |
|
"epoch": 8.4913456508389, |
|
"grad_norm": 3.491951474643429e-06, |
|
"learning_rate": 1.6769784463585263e-08, |
|
"loss": 0.0, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 8.4913456508389, |
|
"eval_loss": 5.676630721040965e-08, |
|
"eval_runtime": 12832.5783, |
|
"eval_samples_per_second": 32.891, |
|
"eval_steps_per_second": 4.111, |
|
"step": 70000 |
|
}, |
|
{ |
|
"epoch": 8.503476144625813, |
|
"grad_norm": 1.0302742339263204e-05, |
|
"learning_rate": 1.663498995780932e-08, |
|
"loss": 0.0, |
|
"step": 70100 |
|
}, |
|
{ |
|
"epoch": 8.515606638412725, |
|
"grad_norm": 6.76013269185205e-06, |
|
"learning_rate": 1.6500195452033375e-08, |
|
"loss": 0.0, |
|
"step": 70200 |
|
}, |
|
{ |
|
"epoch": 8.527737132199638, |
|
"grad_norm": 4.191508651274489e-06, |
|
"learning_rate": 1.636540094625743e-08, |
|
"loss": 0.0, |
|
"step": 70300 |
|
}, |
|
{ |
|
"epoch": 8.53986762598655, |
|
"grad_norm": 9.216681064572185e-06, |
|
"learning_rate": 1.6230606440481483e-08, |
|
"loss": 0.0, |
|
"step": 70400 |
|
}, |
|
{ |
|
"epoch": 8.551998119773463, |
|
"grad_norm": 1.008288290904602e-05, |
|
"learning_rate": 1.6095811934705542e-08, |
|
"loss": 0.0, |
|
"step": 70500 |
|
}, |
|
{ |
|
"epoch": 8.564128613560376, |
|
"grad_norm": 3.22926530316181e-06, |
|
"learning_rate": 1.5961017428929595e-08, |
|
"loss": 0.0, |
|
"step": 70600 |
|
}, |
|
{ |
|
"epoch": 8.576259107347289, |
|
"grad_norm": 4.7994108172133565e-06, |
|
"learning_rate": 1.582622292315365e-08, |
|
"loss": 0.0, |
|
"step": 70700 |
|
}, |
|
{ |
|
"epoch": 8.588389601134201, |
|
"grad_norm": 6.952599051146535e-06, |
|
"learning_rate": 1.5691428417377707e-08, |
|
"loss": 0.0, |
|
"step": 70800 |
|
}, |
|
{ |
|
"epoch": 8.600520094921114, |
|
"grad_norm": 3.6921103401255095e-06, |
|
"learning_rate": 1.5556633911601763e-08, |
|
"loss": 0.0, |
|
"step": 70900 |
|
}, |
|
{ |
|
"epoch": 8.612650588708027, |
|
"grad_norm": 8.551131941203494e-06, |
|
"learning_rate": 1.542183940582582e-08, |
|
"loss": 0.0, |
|
"step": 71000 |
|
}, |
|
{ |
|
"epoch": 8.62478108249494, |
|
"grad_norm": 5.6051849242066965e-06, |
|
"learning_rate": 1.5288392845107635e-08, |
|
"loss": 0.0, |
|
"step": 71100 |
|
}, |
|
{ |
|
"epoch": 8.636911576281852, |
|
"grad_norm": 4.959318630426424e-06, |
|
"learning_rate": 1.5153598339331688e-08, |
|
"loss": 0.0, |
|
"step": 71200 |
|
}, |
|
{ |
|
"epoch": 8.649042070068765, |
|
"grad_norm": 5.334877187124221e-06, |
|
"learning_rate": 1.5018803833555744e-08, |
|
"loss": 0.0, |
|
"step": 71300 |
|
}, |
|
{ |
|
"epoch": 8.661172563855677, |
|
"grad_norm": 9.519723789708223e-06, |
|
"learning_rate": 1.4884009327779798e-08, |
|
"loss": 0.0, |
|
"step": 71400 |
|
}, |
|
{ |
|
"epoch": 8.67330305764259, |
|
"grad_norm": 8.972290743258782e-06, |
|
"learning_rate": 1.4749214822003856e-08, |
|
"loss": 0.0, |
|
"step": 71500 |
|
}, |
|
{ |
|
"epoch": 8.685433551429503, |
|
"grad_norm": 9.876139301923104e-06, |
|
"learning_rate": 1.461442031622791e-08, |
|
"loss": 0.0, |
|
"step": 71600 |
|
}, |
|
{ |
|
"epoch": 8.697564045216415, |
|
"grad_norm": 5.264038918539882e-06, |
|
"learning_rate": 1.4479625810451964e-08, |
|
"loss": 0.0, |
|
"step": 71700 |
|
}, |
|
{ |
|
"epoch": 8.709694539003328, |
|
"grad_norm": 6.115679752838332e-06, |
|
"learning_rate": 1.434483130467602e-08, |
|
"loss": 0.0, |
|
"step": 71800 |
|
}, |
|
{ |
|
"epoch": 8.72182503279024, |
|
"grad_norm": 1.0554780601523817e-05, |
|
"learning_rate": 1.4210036798900076e-08, |
|
"loss": 0.0, |
|
"step": 71900 |
|
}, |
|
{ |
|
"epoch": 8.733955526577153, |
|
"grad_norm": 2.050689909083303e-05, |
|
"learning_rate": 1.4075242293124132e-08, |
|
"loss": 0.0, |
|
"step": 72000 |
|
}, |
|
{ |
|
"epoch": 8.746086020364066, |
|
"grad_norm": 4.232292212691391e-06, |
|
"learning_rate": 1.3940447787348186e-08, |
|
"loss": 0.0, |
|
"step": 72100 |
|
}, |
|
{ |
|
"epoch": 8.758216514150979, |
|
"grad_norm": 1.164754849014571e-05, |
|
"learning_rate": 1.3805653281572244e-08, |
|
"loss": 0.0, |
|
"step": 72200 |
|
}, |
|
{ |
|
"epoch": 8.770347007937891, |
|
"grad_norm": 7.335055670409929e-06, |
|
"learning_rate": 1.3670858775796298e-08, |
|
"loss": 0.0, |
|
"step": 72300 |
|
}, |
|
{ |
|
"epoch": 8.782477501724804, |
|
"grad_norm": 4.493634151003789e-06, |
|
"learning_rate": 1.3536064270020353e-08, |
|
"loss": 0.0, |
|
"step": 72400 |
|
}, |
|
{ |
|
"epoch": 8.794607995511717, |
|
"grad_norm": 6.525173830596032e-06, |
|
"learning_rate": 1.3401269764244407e-08, |
|
"loss": 0.0, |
|
"step": 72500 |
|
}, |
|
{ |
|
"epoch": 8.80673848929863, |
|
"grad_norm": 3.5145178571838187e-06, |
|
"learning_rate": 1.3266475258468465e-08, |
|
"loss": 0.0, |
|
"step": 72600 |
|
}, |
|
{ |
|
"epoch": 8.818868983085542, |
|
"grad_norm": 7.61439468988101e-06, |
|
"learning_rate": 1.313168075269252e-08, |
|
"loss": 0.0, |
|
"step": 72700 |
|
}, |
|
{ |
|
"epoch": 8.830999476872455, |
|
"grad_norm": 9.239704013452865e-06, |
|
"learning_rate": 1.2996886246916575e-08, |
|
"loss": 0.0, |
|
"step": 72800 |
|
}, |
|
{ |
|
"epoch": 8.843129970659367, |
|
"grad_norm": 5.969765879854094e-06, |
|
"learning_rate": 1.286209174114063e-08, |
|
"loss": 0.0, |
|
"step": 72900 |
|
}, |
|
{ |
|
"epoch": 8.855260464446282, |
|
"grad_norm": 1.1594494026212487e-05, |
|
"learning_rate": 1.2727297235364687e-08, |
|
"loss": 0.0, |
|
"step": 73000 |
|
}, |
|
{ |
|
"epoch": 8.867390958233194, |
|
"grad_norm": 7.431879112118622e-06, |
|
"learning_rate": 1.2592502729588741e-08, |
|
"loss": 0.0, |
|
"step": 73100 |
|
}, |
|
{ |
|
"epoch": 8.879521452020107, |
|
"grad_norm": 7.179428394010756e-06, |
|
"learning_rate": 1.2457708223812795e-08, |
|
"loss": 0.0, |
|
"step": 73200 |
|
}, |
|
{ |
|
"epoch": 8.89165194580702, |
|
"grad_norm": 7.79450510890456e-06, |
|
"learning_rate": 1.2322913718036851e-08, |
|
"loss": 0.0, |
|
"step": 73300 |
|
}, |
|
{ |
|
"epoch": 8.903782439593932, |
|
"grad_norm": 7.0768387558928225e-06, |
|
"learning_rate": 1.2188119212260909e-08, |
|
"loss": 0.0, |
|
"step": 73400 |
|
}, |
|
{ |
|
"epoch": 8.915912933380845, |
|
"grad_norm": 9.209243216901086e-06, |
|
"learning_rate": 1.2053324706484963e-08, |
|
"loss": 0.0, |
|
"step": 73500 |
|
}, |
|
{ |
|
"epoch": 8.928043427167758, |
|
"grad_norm": 6.6513057390693575e-06, |
|
"learning_rate": 1.191853020070902e-08, |
|
"loss": 0.0, |
|
"step": 73600 |
|
}, |
|
{ |
|
"epoch": 8.94017392095467, |
|
"grad_norm": 4.44849638370215e-06, |
|
"learning_rate": 1.1783735694933074e-08, |
|
"loss": 0.0, |
|
"step": 73700 |
|
}, |
|
{ |
|
"epoch": 8.952304414741583, |
|
"grad_norm": 3.2620855563436635e-06, |
|
"learning_rate": 1.164894118915713e-08, |
|
"loss": 0.0, |
|
"step": 73800 |
|
}, |
|
{ |
|
"epoch": 8.964434908528496, |
|
"grad_norm": 8.767606232140679e-06, |
|
"learning_rate": 1.1514146683381184e-08, |
|
"loss": 0.0, |
|
"step": 73900 |
|
}, |
|
{ |
|
"epoch": 8.976565402315408, |
|
"grad_norm": 4.0526033444621135e-06, |
|
"learning_rate": 1.137935217760524e-08, |
|
"loss": 0.0, |
|
"step": 74000 |
|
}, |
|
{ |
|
"epoch": 8.988695896102321, |
|
"grad_norm": 6.989345820329618e-06, |
|
"learning_rate": 1.1244557671829296e-08, |
|
"loss": 0.0, |
|
"step": 74100 |
|
}, |
|
{ |
|
"epoch": 9.000826389889234, |
|
"grad_norm": 5.908458206249634e-06, |
|
"learning_rate": 1.1109763166053352e-08, |
|
"loss": 0.0, |
|
"step": 74200 |
|
}, |
|
{ |
|
"epoch": 9.012956883676146, |
|
"grad_norm": 3.4905781376437517e-06, |
|
"learning_rate": 1.0974968660277406e-08, |
|
"loss": 0.0, |
|
"step": 74300 |
|
}, |
|
{ |
|
"epoch": 9.02508737746306, |
|
"grad_norm": 9.163719369098544e-06, |
|
"learning_rate": 1.0840174154501462e-08, |
|
"loss": 0.0, |
|
"step": 74400 |
|
}, |
|
{ |
|
"epoch": 9.037217871249972, |
|
"grad_norm": 1.190317652799422e-05, |
|
"learning_rate": 1.0705379648725517e-08, |
|
"loss": 0.0, |
|
"step": 74500 |
|
}, |
|
{ |
|
"epoch": 9.049348365036884, |
|
"grad_norm": 7.547297627752414e-06, |
|
"learning_rate": 1.0570585142949572e-08, |
|
"loss": 0.0, |
|
"step": 74600 |
|
}, |
|
{ |
|
"epoch": 9.061478858823797, |
|
"grad_norm": 1.0041652785730548e-05, |
|
"learning_rate": 1.0435790637173628e-08, |
|
"loss": 0.0, |
|
"step": 74700 |
|
}, |
|
{ |
|
"epoch": 9.07360935261071, |
|
"grad_norm": 6.380136255756952e-06, |
|
"learning_rate": 1.0300996131397684e-08, |
|
"loss": 0.0, |
|
"step": 74800 |
|
}, |
|
{ |
|
"epoch": 9.085739846397622, |
|
"grad_norm": 1.0097122867591679e-05, |
|
"learning_rate": 1.016620162562174e-08, |
|
"loss": 0.0, |
|
"step": 74900 |
|
}, |
|
{ |
|
"epoch": 9.097870340184535, |
|
"grad_norm": 7.386913239315618e-06, |
|
"learning_rate": 1.0031407119845795e-08, |
|
"loss": 0.0, |
|
"step": 75000 |
|
}, |
|
{ |
|
"epoch": 9.110000833971448, |
|
"grad_norm": 1.3795511222269852e-05, |
|
"learning_rate": 9.897960559127609e-09, |
|
"loss": 0.0, |
|
"step": 75100 |
|
}, |
|
{ |
|
"epoch": 9.12213132775836, |
|
"grad_norm": 4.901587999484036e-06, |
|
"learning_rate": 9.763166053351665e-09, |
|
"loss": 0.0, |
|
"step": 75200 |
|
}, |
|
{ |
|
"epoch": 9.134261821545273, |
|
"grad_norm": 6.113995368650649e-06, |
|
"learning_rate": 9.62837154757572e-09, |
|
"loss": 0.0, |
|
"step": 75300 |
|
}, |
|
{ |
|
"epoch": 9.146392315332186, |
|
"grad_norm": 6.835588919784641e-06, |
|
"learning_rate": 9.493577041799775e-09, |
|
"loss": 0.0, |
|
"step": 75400 |
|
}, |
|
{ |
|
"epoch": 9.158522809119098, |
|
"grad_norm": 4.5478086576622445e-06, |
|
"learning_rate": 9.358782536023831e-09, |
|
"loss": 0.0, |
|
"step": 75500 |
|
}, |
|
{ |
|
"epoch": 9.170653302906011, |
|
"grad_norm": 7.497304068238009e-06, |
|
"learning_rate": 9.223988030247887e-09, |
|
"loss": 0.0, |
|
"step": 75600 |
|
}, |
|
{ |
|
"epoch": 9.182783796692924, |
|
"grad_norm": 6.143738119135378e-06, |
|
"learning_rate": 9.089193524471942e-09, |
|
"loss": 0.0, |
|
"step": 75700 |
|
}, |
|
{ |
|
"epoch": 9.194914290479836, |
|
"grad_norm": 7.741902663838118e-06, |
|
"learning_rate": 8.954399018695998e-09, |
|
"loss": 0.0, |
|
"step": 75800 |
|
}, |
|
{ |
|
"epoch": 9.207044784266749, |
|
"grad_norm": 3.56682039637235e-06, |
|
"learning_rate": 8.819604512920053e-09, |
|
"loss": 0.0, |
|
"step": 75900 |
|
}, |
|
{ |
|
"epoch": 9.219175278053662, |
|
"grad_norm": 3.6248711694497615e-06, |
|
"learning_rate": 8.684810007144108e-09, |
|
"loss": 0.0, |
|
"step": 76000 |
|
}, |
|
{ |
|
"epoch": 9.231305771840574, |
|
"grad_norm": 8.108417205221485e-06, |
|
"learning_rate": 8.550015501368164e-09, |
|
"loss": 0.0, |
|
"step": 76100 |
|
}, |
|
{ |
|
"epoch": 9.243436265627487, |
|
"grad_norm": 4.215437002130784e-06, |
|
"learning_rate": 8.41522099559222e-09, |
|
"loss": 0.0, |
|
"step": 76200 |
|
}, |
|
{ |
|
"epoch": 9.2555667594144, |
|
"grad_norm": 3.4193940336990636e-06, |
|
"learning_rate": 8.280426489816276e-09, |
|
"loss": 0.0, |
|
"step": 76300 |
|
}, |
|
{ |
|
"epoch": 9.267697253201312, |
|
"grad_norm": 6.604200279980432e-06, |
|
"learning_rate": 8.14563198404033e-09, |
|
"loss": 0.0, |
|
"step": 76400 |
|
}, |
|
{ |
|
"epoch": 9.279827746988225, |
|
"grad_norm": 1.4923987691872753e-05, |
|
"learning_rate": 8.010837478264386e-09, |
|
"loss": 0.0, |
|
"step": 76500 |
|
}, |
|
{ |
|
"epoch": 9.291958240775138, |
|
"grad_norm": 3.4648105611267965e-06, |
|
"learning_rate": 7.87604297248844e-09, |
|
"loss": 0.0, |
|
"step": 76600 |
|
}, |
|
{ |
|
"epoch": 9.30408873456205, |
|
"grad_norm": 4.845314379053889e-06, |
|
"learning_rate": 7.741248466712496e-09, |
|
"loss": 0.0, |
|
"step": 76700 |
|
}, |
|
{ |
|
"epoch": 9.316219228348963, |
|
"grad_norm": 1.45012054417748e-05, |
|
"learning_rate": 7.60645396093655e-09, |
|
"loss": 0.0, |
|
"step": 76800 |
|
}, |
|
{ |
|
"epoch": 9.328349722135878, |
|
"grad_norm": 9.72646193986293e-06, |
|
"learning_rate": 7.471659455160607e-09, |
|
"loss": 0.0, |
|
"step": 76900 |
|
}, |
|
{ |
|
"epoch": 9.34048021592279, |
|
"grad_norm": 1.138799598265905e-05, |
|
"learning_rate": 7.336864949384662e-09, |
|
"loss": 0.0, |
|
"step": 77000 |
|
}, |
|
{ |
|
"epoch": 9.352610709709703, |
|
"grad_norm": 1.1094990441051777e-05, |
|
"learning_rate": 7.2020704436087185e-09, |
|
"loss": 0.0, |
|
"step": 77100 |
|
}, |
|
{ |
|
"epoch": 9.364741203496616, |
|
"grad_norm": 5.859043994860258e-06, |
|
"learning_rate": 7.068623882890533e-09, |
|
"loss": 0.0, |
|
"step": 77200 |
|
}, |
|
{ |
|
"epoch": 9.376871697283528, |
|
"grad_norm": 5.342010808817577e-06, |
|
"learning_rate": 6.933829377114589e-09, |
|
"loss": 0.0, |
|
"step": 77300 |
|
}, |
|
{ |
|
"epoch": 9.38900219107044, |
|
"grad_norm": 1.1032276233891025e-05, |
|
"learning_rate": 6.799034871338644e-09, |
|
"loss": 0.0, |
|
"step": 77400 |
|
}, |
|
{ |
|
"epoch": 9.401132684857354, |
|
"grad_norm": 9.584249710314907e-06, |
|
"learning_rate": 6.6642403655627e-09, |
|
"loss": 0.0, |
|
"step": 77500 |
|
}, |
|
{ |
|
"epoch": 9.413263178644266, |
|
"grad_norm": 5.221731044002809e-06, |
|
"learning_rate": 6.529445859786754e-09, |
|
"loss": 0.0, |
|
"step": 77600 |
|
}, |
|
{ |
|
"epoch": 9.425393672431179, |
|
"grad_norm": 9.893785318126902e-06, |
|
"learning_rate": 6.39465135401081e-09, |
|
"loss": 0.0, |
|
"step": 77700 |
|
}, |
|
{ |
|
"epoch": 9.437524166218092, |
|
"grad_norm": 5.504544787982013e-06, |
|
"learning_rate": 6.259856848234865e-09, |
|
"loss": 0.0, |
|
"step": 77800 |
|
}, |
|
{ |
|
"epoch": 9.449654660005004, |
|
"grad_norm": 7.359417395491619e-06, |
|
"learning_rate": 6.125062342458921e-09, |
|
"loss": 0.0, |
|
"step": 77900 |
|
}, |
|
{ |
|
"epoch": 9.461785153791917, |
|
"grad_norm": 6.077909802115755e-06, |
|
"learning_rate": 5.9902678366829765e-09, |
|
"loss": 0.0, |
|
"step": 78000 |
|
}, |
|
{ |
|
"epoch": 9.47391564757883, |
|
"grad_norm": 6.336029855447123e-06, |
|
"learning_rate": 5.855473330907032e-09, |
|
"loss": 0.0, |
|
"step": 78100 |
|
}, |
|
{ |
|
"epoch": 9.486046141365742, |
|
"grad_norm": 1.0881180060096085e-05, |
|
"learning_rate": 5.720678825131088e-09, |
|
"loss": 0.0, |
|
"step": 78200 |
|
}, |
|
{ |
|
"epoch": 9.498176635152655, |
|
"grad_norm": 4.356296358309919e-06, |
|
"learning_rate": 5.585884319355143e-09, |
|
"loss": 0.0, |
|
"step": 78300 |
|
}, |
|
{ |
|
"epoch": 9.510307128939568, |
|
"grad_norm": 6.603406745853135e-06, |
|
"learning_rate": 5.451089813579198e-09, |
|
"loss": 0.0, |
|
"step": 78400 |
|
}, |
|
{ |
|
"epoch": 9.52243762272648, |
|
"grad_norm": 6.903046141815139e-06, |
|
"learning_rate": 5.316295307803254e-09, |
|
"loss": 0.0, |
|
"step": 78500 |
|
}, |
|
{ |
|
"epoch": 9.534568116513393, |
|
"grad_norm": 3.507041128614219e-06, |
|
"learning_rate": 5.181500802027309e-09, |
|
"loss": 0.0, |
|
"step": 78600 |
|
}, |
|
{ |
|
"epoch": 9.546698610300306, |
|
"grad_norm": 3.092974566243356e-06, |
|
"learning_rate": 5.046706296251364e-09, |
|
"loss": 0.0, |
|
"step": 78700 |
|
}, |
|
{ |
|
"epoch": 9.558829104087218, |
|
"grad_norm": 1.3427334124571644e-05, |
|
"learning_rate": 4.911911790475419e-09, |
|
"loss": 0.0, |
|
"step": 78800 |
|
}, |
|
{ |
|
"epoch": 9.57095959787413, |
|
"grad_norm": 1.3484379451256245e-05, |
|
"learning_rate": 4.777117284699476e-09, |
|
"loss": 0.0, |
|
"step": 78900 |
|
}, |
|
{ |
|
"epoch": 9.583090091661044, |
|
"grad_norm": 5.258711098576896e-06, |
|
"learning_rate": 4.642322778923531e-09, |
|
"loss": 0.0, |
|
"step": 79000 |
|
}, |
|
{ |
|
"epoch": 9.595220585447956, |
|
"grad_norm": 3.5142606975568924e-06, |
|
"learning_rate": 4.5075282731475864e-09, |
|
"loss": 0.0, |
|
"step": 79100 |
|
}, |
|
{ |
|
"epoch": 9.607351079234869, |
|
"grad_norm": 6.090160241001286e-06, |
|
"learning_rate": 4.3740817124294016e-09, |
|
"loss": 0.0, |
|
"step": 79200 |
|
}, |
|
{ |
|
"epoch": 9.619481573021782, |
|
"grad_norm": 4.854014150623698e-06, |
|
"learning_rate": 4.239287206653457e-09, |
|
"loss": 0.0, |
|
"step": 79300 |
|
}, |
|
{ |
|
"epoch": 9.631612066808694, |
|
"grad_norm": 1.0469612789165694e-05, |
|
"learning_rate": 4.104492700877512e-09, |
|
"loss": 0.0, |
|
"step": 79400 |
|
}, |
|
{ |
|
"epoch": 9.643742560595607, |
|
"grad_norm": 9.023720849654637e-06, |
|
"learning_rate": 3.969698195101567e-09, |
|
"loss": 0.0, |
|
"step": 79500 |
|
}, |
|
{ |
|
"epoch": 9.65587305438252, |
|
"grad_norm": 5.221518676989945e-06, |
|
"learning_rate": 3.834903689325623e-09, |
|
"loss": 0.0, |
|
"step": 79600 |
|
}, |
|
{ |
|
"epoch": 9.668003548169432, |
|
"grad_norm": 4.774324224854354e-06, |
|
"learning_rate": 3.700109183549678e-09, |
|
"loss": 0.0, |
|
"step": 79700 |
|
}, |
|
{ |
|
"epoch": 9.680134041956345, |
|
"grad_norm": 7.310536147997482e-06, |
|
"learning_rate": 3.5653146777737337e-09, |
|
"loss": 0.0, |
|
"step": 79800 |
|
}, |
|
{ |
|
"epoch": 9.692264535743258, |
|
"grad_norm": 8.2230781117687e-06, |
|
"learning_rate": 3.430520171997789e-09, |
|
"loss": 0.0, |
|
"step": 79900 |
|
}, |
|
{ |
|
"epoch": 9.70439502953017, |
|
"grad_norm": 7.887729225330986e-06, |
|
"learning_rate": 3.295725666221845e-09, |
|
"loss": 0.0, |
|
"step": 80000 |
|
}, |
|
{ |
|
"epoch": 9.70439502953017, |
|
"eval_loss": 5.108978484713589e-08, |
|
"eval_runtime": 13226.5082, |
|
"eval_samples_per_second": 31.911, |
|
"eval_steps_per_second": 3.989, |
|
"step": 80000 |
|
} |
|
], |
|
"logging_steps": 100, |
|
"max_steps": 82430, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 10, |
|
"save_steps": 10000, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 3, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.0401765812729692e+19, |
|
"train_batch_size": 16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|