MISHANM's picture
Upload trainer_state.json with huggingface_hub
543e30d verified
{
"best_metric": 5.108978484713589e-08,
"best_model_checkpoint": "./outputs_L3_hindi/checkpoint-80000",
"epoch": 9.70439502953017,
"eval_steps": 10000,
"global_step": 80000,
"is_hyper_param_search": false,
"is_local_process_zero": true,
"is_world_process_zero": true,
"log_history": [
{
"epoch": 0.012130493786912713,
"grad_norm": 5.738617420196533,
"learning_rate": 1.213150551983501e-09,
"loss": 2.8724,
"step": 100
},
{
"epoch": 0.024260987573825426,
"grad_norm": 5.746348857879639,
"learning_rate": 2.426301103967002e-09,
"loss": 2.8724,
"step": 200
},
{
"epoch": 0.03639148136073814,
"grad_norm": 5.743950843811035,
"learning_rate": 3.6394516559505032e-09,
"loss": 2.8724,
"step": 300
},
{
"epoch": 0.04852197514765085,
"grad_norm": 5.745969295501709,
"learning_rate": 4.852602207934004e-09,
"loss": 2.8724,
"step": 400
},
{
"epoch": 0.060652468934563566,
"grad_norm": 5.757739067077637,
"learning_rate": 6.065752759917506e-09,
"loss": 2.8724,
"step": 500
},
{
"epoch": 0.07278296272147627,
"grad_norm": 5.759668350219727,
"learning_rate": 7.2789033119010064e-09,
"loss": 2.8724,
"step": 600
},
{
"epoch": 0.084913456508389,
"grad_norm": 5.789394855499268,
"learning_rate": 8.492053863884507e-09,
"loss": 2.8724,
"step": 700
},
{
"epoch": 0.0970439502953017,
"grad_norm": 5.801012992858887,
"learning_rate": 9.705204415868008e-09,
"loss": 2.8724,
"step": 800
},
{
"epoch": 0.10917444408221443,
"grad_norm": 5.81521463394165,
"learning_rate": 1.0918354967851511e-08,
"loss": 2.8724,
"step": 900
},
{
"epoch": 0.12130493786912713,
"grad_norm": 5.836797714233398,
"learning_rate": 1.2131505519835012e-08,
"loss": 2.8724,
"step": 1000
},
{
"epoch": 0.13343543165603985,
"grad_norm": 5.866050720214844,
"learning_rate": 1.334465607181851e-08,
"loss": 2.8724,
"step": 1100
},
{
"epoch": 0.14556592544295255,
"grad_norm": 5.896239280700684,
"learning_rate": 1.4557806623802013e-08,
"loss": 2.8724,
"step": 1200
},
{
"epoch": 0.15769641922986527,
"grad_norm": 5.933716297149658,
"learning_rate": 1.5770957175785514e-08,
"loss": 2.8724,
"step": 1300
},
{
"epoch": 0.169826913016778,
"grad_norm": 5.96455717086792,
"learning_rate": 1.6984107727769014e-08,
"loss": 2.8724,
"step": 1400
},
{
"epoch": 0.1819574068036907,
"grad_norm": 5.977672100067139,
"learning_rate": 1.8197258279752517e-08,
"loss": 2.8724,
"step": 1500
},
{
"epoch": 0.1940879005906034,
"grad_norm": 6.015458106994629,
"learning_rate": 1.9410408831736016e-08,
"loss": 2.8724,
"step": 1600
},
{
"epoch": 0.20621839437751613,
"grad_norm": 6.05010986328125,
"learning_rate": 2.062355938371952e-08,
"loss": 2.8724,
"step": 1700
},
{
"epoch": 0.21834888816442885,
"grad_norm": 6.104752540588379,
"learning_rate": 2.1836709935703022e-08,
"loss": 2.8724,
"step": 1800
},
{
"epoch": 0.23047938195134154,
"grad_norm": 6.165137767791748,
"learning_rate": 2.304986048768652e-08,
"loss": 2.8724,
"step": 1900
},
{
"epoch": 0.24260987573825427,
"grad_norm": 6.208470821380615,
"learning_rate": 2.4263011039670024e-08,
"loss": 2.8724,
"step": 2000
},
{
"epoch": 0.254740369525167,
"grad_norm": 6.256989002227783,
"learning_rate": 2.5476161591653524e-08,
"loss": 2.8724,
"step": 2100
},
{
"epoch": 0.2668708633120797,
"grad_norm": 6.313167572021484,
"learning_rate": 2.668931214363702e-08,
"loss": 2.8724,
"step": 2200
},
{
"epoch": 0.27900135709899243,
"grad_norm": 6.35524845123291,
"learning_rate": 2.7902462695620526e-08,
"loss": 2.8723,
"step": 2300
},
{
"epoch": 0.2911318508859051,
"grad_norm": 6.443501949310303,
"learning_rate": 2.9115613247604026e-08,
"loss": 2.8714,
"step": 2400
},
{
"epoch": 0.3032623446728178,
"grad_norm": 6.514742374420166,
"learning_rate": 3.032876379958753e-08,
"loss": 2.8727,
"step": 2500
},
{
"epoch": 0.31539283845973054,
"grad_norm": 6.6397552490234375,
"learning_rate": 3.154191435157103e-08,
"loss": 2.8723,
"step": 2600
},
{
"epoch": 0.32752333224664326,
"grad_norm": 6.5825324058532715,
"learning_rate": 3.275506490355453e-08,
"loss": 2.8718,
"step": 2700
},
{
"epoch": 0.339653826033556,
"grad_norm": 6.749302387237549,
"learning_rate": 3.396821545553803e-08,
"loss": 2.8703,
"step": 2800
},
{
"epoch": 0.3517843198204687,
"grad_norm": 6.816134929656982,
"learning_rate": 3.518136600752153e-08,
"loss": 2.8613,
"step": 2900
},
{
"epoch": 0.3639148136073814,
"grad_norm": 7.144017219543457,
"learning_rate": 3.639451655950503e-08,
"loss": 2.8315,
"step": 3000
},
{
"epoch": 0.3760453073942941,
"grad_norm": 8.347600936889648,
"learning_rate": 3.760766711148853e-08,
"loss": 2.7179,
"step": 3100
},
{
"epoch": 0.3881758011812068,
"grad_norm": 8.95757007598877,
"learning_rate": 3.882081766347203e-08,
"loss": 2.3659,
"step": 3200
},
{
"epoch": 0.40030629496811954,
"grad_norm": 8.870607376098633,
"learning_rate": 4.003396821545554e-08,
"loss": 2.054,
"step": 3300
},
{
"epoch": 0.41243678875503226,
"grad_norm": 8.5811767578125,
"learning_rate": 4.124711876743904e-08,
"loss": 1.9066,
"step": 3400
},
{
"epoch": 0.424567282541945,
"grad_norm": 8.555498123168945,
"learning_rate": 4.246026931942254e-08,
"loss": 1.7966,
"step": 3500
},
{
"epoch": 0.4366977763288577,
"grad_norm": 8.071405410766602,
"learning_rate": 4.3673419871406044e-08,
"loss": 1.7012,
"step": 3600
},
{
"epoch": 0.4488282701157704,
"grad_norm": 7.7888875007629395,
"learning_rate": 4.488657042338954e-08,
"loss": 1.6079,
"step": 3700
},
{
"epoch": 0.4609587639026831,
"grad_norm": 7.70664119720459,
"learning_rate": 4.609972097537304e-08,
"loss": 1.5164,
"step": 3800
},
{
"epoch": 0.4730892576895958,
"grad_norm": 7.393267631530762,
"learning_rate": 4.731287152735654e-08,
"loss": 1.4265,
"step": 3900
},
{
"epoch": 0.48521975147650853,
"grad_norm": 6.967721939086914,
"learning_rate": 4.852602207934005e-08,
"loss": 1.3367,
"step": 4000
},
{
"epoch": 0.49735024526342125,
"grad_norm": 6.281134605407715,
"learning_rate": 4.973917263132355e-08,
"loss": 1.2483,
"step": 4100
},
{
"epoch": 0.509480739050334,
"grad_norm": 5.565547943115234,
"learning_rate": 5.095232318330705e-08,
"loss": 1.1588,
"step": 4200
},
{
"epoch": 0.5216112328372466,
"grad_norm": 4.586236953735352,
"learning_rate": 5.216547373529054e-08,
"loss": 1.0804,
"step": 4300
},
{
"epoch": 0.5337417266241594,
"grad_norm": 3.6855998039245605,
"learning_rate": 5.337862428727404e-08,
"loss": 1.013,
"step": 4400
},
{
"epoch": 0.5458722204110721,
"grad_norm": 3.2828409671783447,
"learning_rate": 5.459177483925755e-08,
"loss": 0.9534,
"step": 4500
},
{
"epoch": 0.5580027141979849,
"grad_norm": 3.190077066421509,
"learning_rate": 5.580492539124105e-08,
"loss": 0.8995,
"step": 4600
},
{
"epoch": 0.5701332079848975,
"grad_norm": 3.253711700439453,
"learning_rate": 5.701807594322455e-08,
"loss": 0.8491,
"step": 4700
},
{
"epoch": 0.5822637017718102,
"grad_norm": 3.392833948135376,
"learning_rate": 5.823122649520805e-08,
"loss": 0.7953,
"step": 4800
},
{
"epoch": 0.594394195558723,
"grad_norm": 3.544286012649536,
"learning_rate": 5.944437704719155e-08,
"loss": 0.7384,
"step": 4900
},
{
"epoch": 0.6065246893456356,
"grad_norm": 3.427243947982788,
"learning_rate": 6.065752759917506e-08,
"loss": 0.6794,
"step": 5000
},
{
"epoch": 0.6186551831325484,
"grad_norm": 2.903007745742798,
"learning_rate": 6.187067815115856e-08,
"loss": 0.6207,
"step": 5100
},
{
"epoch": 0.6307856769194611,
"grad_norm": 2.3621325492858887,
"learning_rate": 6.308382870314206e-08,
"loss": 0.5738,
"step": 5200
},
{
"epoch": 0.6429161707063739,
"grad_norm": 2.0431277751922607,
"learning_rate": 6.429697925512556e-08,
"loss": 0.5392,
"step": 5300
},
{
"epoch": 0.6550466644932865,
"grad_norm": 1.8731169700622559,
"learning_rate": 6.551012980710906e-08,
"loss": 0.513,
"step": 5400
},
{
"epoch": 0.6671771582801992,
"grad_norm": 1.7845033407211304,
"learning_rate": 6.672328035909256e-08,
"loss": 0.4931,
"step": 5500
},
{
"epoch": 0.679307652067112,
"grad_norm": 1.7125587463378906,
"learning_rate": 6.793643091107605e-08,
"loss": 0.4783,
"step": 5600
},
{
"epoch": 0.6914381458540246,
"grad_norm": 1.687855839729309,
"learning_rate": 6.914958146305955e-08,
"loss": 0.4678,
"step": 5700
},
{
"epoch": 0.7035686396409374,
"grad_norm": 1.6900482177734375,
"learning_rate": 7.036273201504305e-08,
"loss": 0.4585,
"step": 5800
},
{
"epoch": 0.7156991334278501,
"grad_norm": 1.7122031450271606,
"learning_rate": 7.157588256702657e-08,
"loss": 0.4531,
"step": 5900
},
{
"epoch": 0.7278296272147629,
"grad_norm": 1.7423112392425537,
"learning_rate": 7.278903311901007e-08,
"loss": 0.446,
"step": 6000
},
{
"epoch": 0.7399601210016755,
"grad_norm": 1.8058453798294067,
"learning_rate": 7.400218367099357e-08,
"loss": 0.4414,
"step": 6100
},
{
"epoch": 0.7520906147885882,
"grad_norm": 1.8512169122695923,
"learning_rate": 7.521533422297707e-08,
"loss": 0.4368,
"step": 6200
},
{
"epoch": 0.764221108575501,
"grad_norm": 1.8963505029678345,
"learning_rate": 7.642848477496056e-08,
"loss": 0.4328,
"step": 6300
},
{
"epoch": 0.7763516023624136,
"grad_norm": 1.947475552558899,
"learning_rate": 7.764163532694406e-08,
"loss": 0.4273,
"step": 6400
},
{
"epoch": 0.7884820961493264,
"grad_norm": 1.9949339628219604,
"learning_rate": 7.885478587892758e-08,
"loss": 0.4225,
"step": 6500
},
{
"epoch": 0.8006125899362391,
"grad_norm": 2.0207386016845703,
"learning_rate": 8.006793643091108e-08,
"loss": 0.4172,
"step": 6600
},
{
"epoch": 0.8127430837231518,
"grad_norm": 2.0674002170562744,
"learning_rate": 8.128108698289458e-08,
"loss": 0.4126,
"step": 6700
},
{
"epoch": 0.8248735775100645,
"grad_norm": 2.079556703567505,
"learning_rate": 8.249423753487808e-08,
"loss": 0.4069,
"step": 6800
},
{
"epoch": 0.8370040712969772,
"grad_norm": 2.115440607070923,
"learning_rate": 8.370738808686158e-08,
"loss": 0.4021,
"step": 6900
},
{
"epoch": 0.84913456508389,
"grad_norm": 2.1741063594818115,
"learning_rate": 8.492053863884507e-08,
"loss": 0.3977,
"step": 7000
},
{
"epoch": 0.8612650588708026,
"grad_norm": 2.1430447101593018,
"learning_rate": 8.613368919082857e-08,
"loss": 0.3929,
"step": 7100
},
{
"epoch": 0.8733955526577154,
"grad_norm": 2.102972984313965,
"learning_rate": 8.734683974281209e-08,
"loss": 0.3872,
"step": 7200
},
{
"epoch": 0.8855260464446281,
"grad_norm": 2.0882880687713623,
"learning_rate": 8.855999029479559e-08,
"loss": 0.3817,
"step": 7300
},
{
"epoch": 0.8976565402315408,
"grad_norm": 1.9396212100982666,
"learning_rate": 8.977314084677909e-08,
"loss": 0.3772,
"step": 7400
},
{
"epoch": 0.9097870340184535,
"grad_norm": 1.8905789852142334,
"learning_rate": 9.098629139876259e-08,
"loss": 0.3714,
"step": 7500
},
{
"epoch": 0.9219175278053662,
"grad_norm": 1.8100907802581787,
"learning_rate": 9.219944195074609e-08,
"loss": 0.3673,
"step": 7600
},
{
"epoch": 0.934048021592279,
"grad_norm": 1.6033921241760254,
"learning_rate": 9.341259250272959e-08,
"loss": 0.3621,
"step": 7700
},
{
"epoch": 0.9461785153791916,
"grad_norm": 1.5708930492401123,
"learning_rate": 9.462574305471308e-08,
"loss": 0.3572,
"step": 7800
},
{
"epoch": 0.9583090091661044,
"grad_norm": 1.5194717645645142,
"learning_rate": 9.58388936066966e-08,
"loss": 0.3529,
"step": 7900
},
{
"epoch": 0.9704395029530171,
"grad_norm": 1.5393712520599365,
"learning_rate": 9.70520441586801e-08,
"loss": 0.3479,
"step": 8000
},
{
"epoch": 0.9825699967399298,
"grad_norm": 1.4730511903762817,
"learning_rate": 9.82651947106636e-08,
"loss": 0.3436,
"step": 8100
},
{
"epoch": 0.9947004905268425,
"grad_norm": 1.4694753885269165,
"learning_rate": 9.94783452626471e-08,
"loss": 0.3392,
"step": 8200
},
{
"epoch": 1.0068309843137553,
"grad_norm": 1.4455537796020508,
"learning_rate": 9.992451507676548e-08,
"loss": 0.3344,
"step": 8300
},
{
"epoch": 1.018961478100668,
"grad_norm": 1.4519261121749878,
"learning_rate": 9.978972057098953e-08,
"loss": 0.3293,
"step": 8400
},
{
"epoch": 1.0310919718875806,
"grad_norm": 1.4432621002197266,
"learning_rate": 9.965492606521357e-08,
"loss": 0.3248,
"step": 8500
},
{
"epoch": 1.0432224656744933,
"grad_norm": 1.414642095565796,
"learning_rate": 9.952013155943763e-08,
"loss": 0.3201,
"step": 8600
},
{
"epoch": 1.0553529594614062,
"grad_norm": 1.382421851158142,
"learning_rate": 9.938533705366169e-08,
"loss": 0.3154,
"step": 8700
},
{
"epoch": 1.0674834532483188,
"grad_norm": 1.314692497253418,
"learning_rate": 9.925054254788574e-08,
"loss": 0.3107,
"step": 8800
},
{
"epoch": 1.0796139470352315,
"grad_norm": 1.253720998764038,
"learning_rate": 9.91157480421098e-08,
"loss": 0.3064,
"step": 8900
},
{
"epoch": 1.0917444408221442,
"grad_norm": 1.1932885646820068,
"learning_rate": 9.898095353633386e-08,
"loss": 0.3021,
"step": 9000
},
{
"epoch": 1.1038749346090568,
"grad_norm": 1.1070135831832886,
"learning_rate": 9.884615903055791e-08,
"loss": 0.2975,
"step": 9100
},
{
"epoch": 1.1160054283959697,
"grad_norm": 1.03802490234375,
"learning_rate": 9.871136452478198e-08,
"loss": 0.2931,
"step": 9200
},
{
"epoch": 1.1281359221828824,
"grad_norm": 1.0262868404388428,
"learning_rate": 9.857657001900603e-08,
"loss": 0.2892,
"step": 9300
},
{
"epoch": 1.140266415969795,
"grad_norm": 0.973850667476654,
"learning_rate": 9.844177551323008e-08,
"loss": 0.2849,
"step": 9400
},
{
"epoch": 1.1523969097567077,
"grad_norm": 0.9496479034423828,
"learning_rate": 9.830698100745413e-08,
"loss": 0.281,
"step": 9500
},
{
"epoch": 1.1645274035436204,
"grad_norm": 0.9395837187767029,
"learning_rate": 9.817218650167819e-08,
"loss": 0.2772,
"step": 9600
},
{
"epoch": 1.1766578973305333,
"grad_norm": 0.9417792558670044,
"learning_rate": 9.803739199590224e-08,
"loss": 0.2731,
"step": 9700
},
{
"epoch": 1.188788391117446,
"grad_norm": 0.9536375999450684,
"learning_rate": 9.790259749012629e-08,
"loss": 0.2691,
"step": 9800
},
{
"epoch": 1.2009188849043586,
"grad_norm": 0.9853664040565491,
"learning_rate": 9.776780298435036e-08,
"loss": 0.2652,
"step": 9900
},
{
"epoch": 1.2130493786912713,
"grad_norm": 0.9936960339546204,
"learning_rate": 9.763300847857441e-08,
"loss": 0.2611,
"step": 10000
},
{
"epoch": 1.2130493786912713,
"eval_loss": 0.25932416319847107,
"eval_runtime": 12876.8542,
"eval_samples_per_second": 32.778,
"eval_steps_per_second": 4.097,
"step": 10000
},
{
"epoch": 1.2251798724781842,
"grad_norm": 1.0140084028244019,
"learning_rate": 9.749821397279846e-08,
"loss": 0.2568,
"step": 10100
},
{
"epoch": 1.2373103662650968,
"grad_norm": 1.0273332595825195,
"learning_rate": 9.736341946702253e-08,
"loss": 0.2524,
"step": 10200
},
{
"epoch": 1.2494408600520095,
"grad_norm": 1.0327990055084229,
"learning_rate": 9.722862496124658e-08,
"loss": 0.2478,
"step": 10300
},
{
"epoch": 1.2615713538389222,
"grad_norm": 1.0590392351150513,
"learning_rate": 9.709383045547062e-08,
"loss": 0.243,
"step": 10400
},
{
"epoch": 1.2737018476258348,
"grad_norm": 1.0778623819351196,
"learning_rate": 9.695903594969469e-08,
"loss": 0.2381,
"step": 10500
},
{
"epoch": 1.2858323414127477,
"grad_norm": 1.104440450668335,
"learning_rate": 9.682424144391874e-08,
"loss": 0.2332,
"step": 10600
},
{
"epoch": 1.2979628351996604,
"grad_norm": 1.1143620014190674,
"learning_rate": 9.668944693814279e-08,
"loss": 0.228,
"step": 10700
},
{
"epoch": 1.310093328986573,
"grad_norm": 1.1427433490753174,
"learning_rate": 9.655465243236686e-08,
"loss": 0.2227,
"step": 10800
},
{
"epoch": 1.3222238227734857,
"grad_norm": 1.145898461341858,
"learning_rate": 9.641985792659091e-08,
"loss": 0.2173,
"step": 10900
},
{
"epoch": 1.3343543165603986,
"grad_norm": 1.178074598312378,
"learning_rate": 9.628506342081496e-08,
"loss": 0.2115,
"step": 11000
},
{
"epoch": 1.3464848103473113,
"grad_norm": 1.174429178237915,
"learning_rate": 9.615026891503903e-08,
"loss": 0.2058,
"step": 11100
},
{
"epoch": 1.358615304134224,
"grad_norm": 1.1728930473327637,
"learning_rate": 9.601547440926308e-08,
"loss": 0.1998,
"step": 11200
},
{
"epoch": 1.3707457979211366,
"grad_norm": 1.1560231447219849,
"learning_rate": 9.588067990348712e-08,
"loss": 0.1936,
"step": 11300
},
{
"epoch": 1.3828762917080493,
"grad_norm": 1.1736541986465454,
"learning_rate": 9.574588539771119e-08,
"loss": 0.1875,
"step": 11400
},
{
"epoch": 1.395006785494962,
"grad_norm": 1.231817603111267,
"learning_rate": 9.561109089193524e-08,
"loss": 0.1814,
"step": 11500
},
{
"epoch": 1.4071372792818748,
"grad_norm": 1.5932906866073608,
"learning_rate": 9.547629638615929e-08,
"loss": 0.1748,
"step": 11600
},
{
"epoch": 1.4192677730687875,
"grad_norm": 2.940645456314087,
"learning_rate": 9.534150188038334e-08,
"loss": 0.1673,
"step": 11700
},
{
"epoch": 1.4313982668557002,
"grad_norm": 2.8523619174957275,
"learning_rate": 9.520805531966517e-08,
"loss": 0.1566,
"step": 11800
},
{
"epoch": 1.443528760642613,
"grad_norm": 0.9756022691726685,
"learning_rate": 9.507326081388922e-08,
"loss": 0.1509,
"step": 11900
},
{
"epoch": 1.4556592544295257,
"grad_norm": 1.0255942344665527,
"learning_rate": 9.493846630811327e-08,
"loss": 0.1467,
"step": 12000
},
{
"epoch": 1.4677897482164384,
"grad_norm": 2.2742061614990234,
"learning_rate": 9.480367180233734e-08,
"loss": 0.143,
"step": 12100
},
{
"epoch": 1.479920242003351,
"grad_norm": 2.624950408935547,
"learning_rate": 9.466887729656139e-08,
"loss": 0.14,
"step": 12200
},
{
"epoch": 1.4920507357902637,
"grad_norm": 5.254027366638184,
"learning_rate": 9.453408279078544e-08,
"loss": 0.1371,
"step": 12300
},
{
"epoch": 1.5041812295771764,
"grad_norm": 2.4900190830230713,
"learning_rate": 9.439928828500951e-08,
"loss": 0.1342,
"step": 12400
},
{
"epoch": 1.516311723364089,
"grad_norm": 1.6087634563446045,
"learning_rate": 9.426449377923355e-08,
"loss": 0.1315,
"step": 12500
},
{
"epoch": 1.528442217151002,
"grad_norm": 2.786614179611206,
"learning_rate": 9.41296992734576e-08,
"loss": 0.1289,
"step": 12600
},
{
"epoch": 1.5405727109379146,
"grad_norm": 5.523770809173584,
"learning_rate": 9.399490476768165e-08,
"loss": 0.1269,
"step": 12700
},
{
"epoch": 1.5527032047248275,
"grad_norm": 7.25140380859375,
"learning_rate": 9.386011026190572e-08,
"loss": 0.1243,
"step": 12800
},
{
"epoch": 1.5648336985117401,
"grad_norm": 1.2261604070663452,
"learning_rate": 9.372531575612977e-08,
"loss": 0.1213,
"step": 12900
},
{
"epoch": 1.5769641922986528,
"grad_norm": 1.0399848222732544,
"learning_rate": 9.359052125035383e-08,
"loss": 0.1176,
"step": 13000
},
{
"epoch": 1.5890946860855655,
"grad_norm": 1.0358467102050781,
"learning_rate": 9.345572674457789e-08,
"loss": 0.1138,
"step": 13100
},
{
"epoch": 1.6012251798724781,
"grad_norm": 1.7050210237503052,
"learning_rate": 9.332093223880194e-08,
"loss": 0.1105,
"step": 13200
},
{
"epoch": 1.6133556736593908,
"grad_norm": 6.696021556854248,
"learning_rate": 9.3186137733026e-08,
"loss": 0.1078,
"step": 13300
},
{
"epoch": 1.6254861674463035,
"grad_norm": 1.208287239074707,
"learning_rate": 9.305134322725005e-08,
"loss": 0.1037,
"step": 13400
},
{
"epoch": 1.6376166612332164,
"grad_norm": 1.0631296634674072,
"learning_rate": 9.29165487214741e-08,
"loss": 0.0993,
"step": 13500
},
{
"epoch": 1.649747155020129,
"grad_norm": 1.4672938585281372,
"learning_rate": 9.278175421569816e-08,
"loss": 0.0952,
"step": 13600
},
{
"epoch": 1.6618776488070417,
"grad_norm": 1.4060345888137817,
"learning_rate": 9.264695970992222e-08,
"loss": 0.0913,
"step": 13700
},
{
"epoch": 1.6740081425939546,
"grad_norm": 7.0771026611328125,
"learning_rate": 9.251351314920403e-08,
"loss": 0.0881,
"step": 13800
},
{
"epoch": 1.6861386363808673,
"grad_norm": 3.3149595260620117,
"learning_rate": 9.237871864342808e-08,
"loss": 0.0855,
"step": 13900
},
{
"epoch": 1.69826913016778,
"grad_norm": 2.399245023727417,
"learning_rate": 9.224392413765214e-08,
"loss": 0.083,
"step": 14000
},
{
"epoch": 1.7103996239546926,
"grad_norm": 5.708673477172852,
"learning_rate": 9.21091296318762e-08,
"loss": 0.0811,
"step": 14100
},
{
"epoch": 1.7225301177416052,
"grad_norm": 2.2118566036224365,
"learning_rate": 9.197433512610025e-08,
"loss": 0.0788,
"step": 14200
},
{
"epoch": 1.734660611528518,
"grad_norm": 1.6321247816085815,
"learning_rate": 9.183954062032431e-08,
"loss": 0.0762,
"step": 14300
},
{
"epoch": 1.7467911053154306,
"grad_norm": 3.3088338375091553,
"learning_rate": 9.170474611454837e-08,
"loss": 0.0739,
"step": 14400
},
{
"epoch": 1.7589215991023435,
"grad_norm": 1.9590739011764526,
"learning_rate": 9.156995160877243e-08,
"loss": 0.071,
"step": 14500
},
{
"epoch": 1.7710520928892561,
"grad_norm": 2.8815441131591797,
"learning_rate": 9.143515710299648e-08,
"loss": 0.068,
"step": 14600
},
{
"epoch": 1.783182586676169,
"grad_norm": 2.3235983848571777,
"learning_rate": 9.130036259722053e-08,
"loss": 0.065,
"step": 14700
},
{
"epoch": 1.7953130804630817,
"grad_norm": 2.7600650787353516,
"learning_rate": 9.116556809144458e-08,
"loss": 0.0624,
"step": 14800
},
{
"epoch": 1.8074435742499944,
"grad_norm": 2.3273894786834717,
"learning_rate": 9.103077358566864e-08,
"loss": 0.0601,
"step": 14900
},
{
"epoch": 1.819574068036907,
"grad_norm": 5.992413520812988,
"learning_rate": 9.08959790798927e-08,
"loss": 0.0575,
"step": 15000
},
{
"epoch": 1.8317045618238197,
"grad_norm": 1.347684621810913,
"learning_rate": 9.076118457411675e-08,
"loss": 0.0542,
"step": 15100
},
{
"epoch": 1.8438350556107324,
"grad_norm": 0.8873547315597534,
"learning_rate": 9.062639006834081e-08,
"loss": 0.0503,
"step": 15200
},
{
"epoch": 1.855965549397645,
"grad_norm": 6.997786521911621,
"learning_rate": 9.049159556256487e-08,
"loss": 0.0468,
"step": 15300
},
{
"epoch": 1.868096043184558,
"grad_norm": 6.812012195587158,
"learning_rate": 9.035680105678893e-08,
"loss": 0.0447,
"step": 15400
},
{
"epoch": 1.8802265369714706,
"grad_norm": 6.6378350257873535,
"learning_rate": 9.022200655101298e-08,
"loss": 0.043,
"step": 15500
},
{
"epoch": 1.8923570307583835,
"grad_norm": 2.3120977878570557,
"learning_rate": 9.008721204523703e-08,
"loss": 0.0411,
"step": 15600
},
{
"epoch": 1.9044875245452961,
"grad_norm": 3.212885618209839,
"learning_rate": 8.995241753946108e-08,
"loss": 0.0394,
"step": 15700
},
{
"epoch": 1.9166180183322088,
"grad_norm": 2.6104061603546143,
"learning_rate": 8.981762303368514e-08,
"loss": 0.0379,
"step": 15800
},
{
"epoch": 1.9287485121191215,
"grad_norm": 1.1504628658294678,
"learning_rate": 8.968282852790919e-08,
"loss": 0.0357,
"step": 15900
},
{
"epoch": 1.9408790059060341,
"grad_norm": 0.8576985597610474,
"learning_rate": 8.954938196719101e-08,
"loss": 0.0334,
"step": 16000
},
{
"epoch": 1.9530094996929468,
"grad_norm": 3.4249160289764404,
"learning_rate": 8.941458746141506e-08,
"loss": 0.0309,
"step": 16100
},
{
"epoch": 1.9651399934798595,
"grad_norm": 3.397020101547241,
"learning_rate": 8.927979295563912e-08,
"loss": 0.0285,
"step": 16200
},
{
"epoch": 1.9772704872667723,
"grad_norm": 0.8086531162261963,
"learning_rate": 8.914499844986318e-08,
"loss": 0.0265,
"step": 16300
},
{
"epoch": 1.989400981053685,
"grad_norm": 0.8438703417778015,
"learning_rate": 8.901020394408724e-08,
"loss": 0.0245,
"step": 16400
},
{
"epoch": 2.001531474840598,
"grad_norm": 0.8428291082382202,
"learning_rate": 8.887540943831129e-08,
"loss": 0.0227,
"step": 16500
},
{
"epoch": 2.0136619686275106,
"grad_norm": 1.706946611404419,
"learning_rate": 8.874061493253534e-08,
"loss": 0.0212,
"step": 16600
},
{
"epoch": 2.0257924624144232,
"grad_norm": 1.5371583700180054,
"learning_rate": 8.860582042675941e-08,
"loss": 0.02,
"step": 16700
},
{
"epoch": 2.037922956201336,
"grad_norm": 1.4825010299682617,
"learning_rate": 8.847102592098346e-08,
"loss": 0.0188,
"step": 16800
},
{
"epoch": 2.0500534499882486,
"grad_norm": 0.9448016285896301,
"learning_rate": 8.833623141520751e-08,
"loss": 0.0176,
"step": 16900
},
{
"epoch": 2.0621839437751612,
"grad_norm": 1.871368169784546,
"learning_rate": 8.820143690943157e-08,
"loss": 0.0163,
"step": 17000
},
{
"epoch": 2.074314437562074,
"grad_norm": 0.7671025991439819,
"learning_rate": 8.806664240365562e-08,
"loss": 0.0153,
"step": 17100
},
{
"epoch": 2.0864449313489866,
"grad_norm": 0.6203155517578125,
"learning_rate": 8.793184789787967e-08,
"loss": 0.0142,
"step": 17200
},
{
"epoch": 2.0985754251358992,
"grad_norm": 0.5058071613311768,
"learning_rate": 8.779705339210374e-08,
"loss": 0.0132,
"step": 17300
},
{
"epoch": 2.1107059189228123,
"grad_norm": 1.8204731941223145,
"learning_rate": 8.766225888632779e-08,
"loss": 0.0122,
"step": 17400
},
{
"epoch": 2.122836412709725,
"grad_norm": 0.3997783660888672,
"learning_rate": 8.752746438055184e-08,
"loss": 0.0114,
"step": 17500
},
{
"epoch": 2.1349669064966377,
"grad_norm": 0.304776668548584,
"learning_rate": 8.739266987477591e-08,
"loss": 0.0106,
"step": 17600
},
{
"epoch": 2.1470974002835503,
"grad_norm": 0.31530994176864624,
"learning_rate": 8.725787536899996e-08,
"loss": 0.0098,
"step": 17700
},
{
"epoch": 2.159227894070463,
"grad_norm": 0.3956185579299927,
"learning_rate": 8.712308086322401e-08,
"loss": 0.0091,
"step": 17800
},
{
"epoch": 2.1713583878573757,
"grad_norm": 1.1947382688522339,
"learning_rate": 8.698828635744807e-08,
"loss": 0.0084,
"step": 17900
},
{
"epoch": 2.1834888816442883,
"grad_norm": 0.33143043518066406,
"learning_rate": 8.685349185167212e-08,
"loss": 0.0078,
"step": 18000
},
{
"epoch": 2.195619375431201,
"grad_norm": 0.46097975969314575,
"learning_rate": 8.671869734589617e-08,
"loss": 0.0073,
"step": 18100
},
{
"epoch": 2.2077498692181137,
"grad_norm": 0.28613144159317017,
"learning_rate": 8.658390284012024e-08,
"loss": 0.0068,
"step": 18200
},
{
"epoch": 2.219880363005027,
"grad_norm": 0.339844286441803,
"learning_rate": 8.644910833434429e-08,
"loss": 0.0063,
"step": 18300
},
{
"epoch": 2.2320108567919394,
"grad_norm": 0.7511897683143616,
"learning_rate": 8.631431382856834e-08,
"loss": 0.0059,
"step": 18400
},
{
"epoch": 2.244141350578852,
"grad_norm": 0.6955689787864685,
"learning_rate": 8.617951932279241e-08,
"loss": 0.0055,
"step": 18500
},
{
"epoch": 2.256271844365765,
"grad_norm": 0.40644150972366333,
"learning_rate": 8.604472481701646e-08,
"loss": 0.0051,
"step": 18600
},
{
"epoch": 2.2684023381526774,
"grad_norm": 0.23919856548309326,
"learning_rate": 8.590993031124051e-08,
"loss": 0.0048,
"step": 18700
},
{
"epoch": 2.28053283193959,
"grad_norm": 0.40758979320526123,
"learning_rate": 8.577513580546457e-08,
"loss": 0.0045,
"step": 18800
},
{
"epoch": 2.2926633257265028,
"grad_norm": 0.3676837384700775,
"learning_rate": 8.564034129968862e-08,
"loss": 0.0043,
"step": 18900
},
{
"epoch": 2.3047938195134154,
"grad_norm": 0.27469906210899353,
"learning_rate": 8.550554679391267e-08,
"loss": 0.004,
"step": 19000
},
{
"epoch": 2.316924313300328,
"grad_norm": 0.4906423091888428,
"learning_rate": 8.537075228813672e-08,
"loss": 0.0037,
"step": 19100
},
{
"epoch": 2.3290548070872408,
"grad_norm": 0.3602069616317749,
"learning_rate": 8.523595778236079e-08,
"loss": 0.0035,
"step": 19200
},
{
"epoch": 2.341185300874154,
"grad_norm": 0.38956815004348755,
"learning_rate": 8.510116327658484e-08,
"loss": 0.0033,
"step": 19300
},
{
"epoch": 2.3533157946610666,
"grad_norm": 0.34021806716918945,
"learning_rate": 8.49663687708089e-08,
"loss": 0.0031,
"step": 19400
},
{
"epoch": 2.365446288447979,
"grad_norm": 0.42637619376182556,
"learning_rate": 8.483157426503296e-08,
"loss": 0.0029,
"step": 19500
},
{
"epoch": 2.377576782234892,
"grad_norm": 0.2764069437980652,
"learning_rate": 8.469677975925701e-08,
"loss": 0.0027,
"step": 19600
},
{
"epoch": 2.3897072760218045,
"grad_norm": 0.14541544020175934,
"learning_rate": 8.456198525348107e-08,
"loss": 0.0026,
"step": 19700
},
{
"epoch": 2.401837769808717,
"grad_norm": 0.2202480435371399,
"learning_rate": 8.442719074770512e-08,
"loss": 0.0024,
"step": 19800
},
{
"epoch": 2.41396826359563,
"grad_norm": 0.09995169192552567,
"learning_rate": 8.429239624192917e-08,
"loss": 0.0023,
"step": 19900
},
{
"epoch": 2.4260987573825425,
"grad_norm": 0.20967301726341248,
"learning_rate": 8.415760173615322e-08,
"loss": 0.0022,
"step": 20000
},
{
"epoch": 2.4260987573825425,
"eval_loss": 0.0021133332047611475,
"eval_runtime": 12677.9868,
"eval_samples_per_second": 33.292,
"eval_steps_per_second": 4.162,
"step": 20000
},
{
"epoch": 2.4382292511694557,
"grad_norm": 0.26037752628326416,
"learning_rate": 8.402280723037729e-08,
"loss": 0.0021,
"step": 20100
},
{
"epoch": 2.4503597449563683,
"grad_norm": 0.2062160074710846,
"learning_rate": 8.388801272460134e-08,
"loss": 0.0019,
"step": 20200
},
{
"epoch": 2.462490238743281,
"grad_norm": 0.14383727312088013,
"learning_rate": 8.37532182188254e-08,
"loss": 0.0018,
"step": 20300
},
{
"epoch": 2.4746207325301937,
"grad_norm": 0.15886737406253815,
"learning_rate": 8.361842371304946e-08,
"loss": 0.0017,
"step": 20400
},
{
"epoch": 2.4867512263171063,
"grad_norm": 0.11849108338356018,
"learning_rate": 8.348362920727351e-08,
"loss": 0.0016,
"step": 20500
},
{
"epoch": 2.498881720104019,
"grad_norm": 0.14686524868011475,
"learning_rate": 8.334883470149757e-08,
"loss": 0.0015,
"step": 20600
},
{
"epoch": 2.5110122138909317,
"grad_norm": 0.20525987446308136,
"learning_rate": 8.321404019572162e-08,
"loss": 0.0015,
"step": 20700
},
{
"epoch": 2.5231427076778443,
"grad_norm": 0.23715050518512726,
"learning_rate": 8.307924568994567e-08,
"loss": 0.0014,
"step": 20800
},
{
"epoch": 2.535273201464757,
"grad_norm": 0.16575060784816742,
"learning_rate": 8.294445118416972e-08,
"loss": 0.0013,
"step": 20900
},
{
"epoch": 2.5474036952516697,
"grad_norm": 0.21624301373958588,
"learning_rate": 8.280965667839378e-08,
"loss": 0.0012,
"step": 21000
},
{
"epoch": 2.5595341890385823,
"grad_norm": 0.2248183786869049,
"learning_rate": 8.267486217261784e-08,
"loss": 0.0012,
"step": 21100
},
{
"epoch": 2.5716646828254954,
"grad_norm": 0.09357228130102158,
"learning_rate": 8.25400676668419e-08,
"loss": 0.0011,
"step": 21200
},
{
"epoch": 2.583795176612408,
"grad_norm": 0.14724239706993103,
"learning_rate": 8.240527316106595e-08,
"loss": 0.001,
"step": 21300
},
{
"epoch": 2.5959256703993208,
"grad_norm": 0.09311047941446304,
"learning_rate": 8.227047865529001e-08,
"loss": 0.001,
"step": 21400
},
{
"epoch": 2.6080561641862334,
"grad_norm": 0.13593658804893494,
"learning_rate": 8.213568414951407e-08,
"loss": 0.0009,
"step": 21500
},
{
"epoch": 2.620186657973146,
"grad_norm": 0.12716355919837952,
"learning_rate": 8.200088964373812e-08,
"loss": 0.0009,
"step": 21600
},
{
"epoch": 2.6323171517600588,
"grad_norm": 0.09387937188148499,
"learning_rate": 8.186609513796217e-08,
"loss": 0.0008,
"step": 21700
},
{
"epoch": 2.6444476455469714,
"grad_norm": 0.09374915808439255,
"learning_rate": 8.173130063218623e-08,
"loss": 0.0008,
"step": 21800
},
{
"epoch": 2.6565781393338845,
"grad_norm": 0.09871383756399155,
"learning_rate": 8.159650612641028e-08,
"loss": 0.0008,
"step": 21900
},
{
"epoch": 2.668708633120797,
"grad_norm": 0.20628078281879425,
"learning_rate": 8.146171162063434e-08,
"loss": 0.0007,
"step": 22000
},
{
"epoch": 2.68083912690771,
"grad_norm": 0.06819931417703629,
"learning_rate": 8.13269171148584e-08,
"loss": 0.0007,
"step": 22100
},
{
"epoch": 2.6929696206946225,
"grad_norm": 0.09931056201457977,
"learning_rate": 8.119212260908245e-08,
"loss": 0.0006,
"step": 22200
},
{
"epoch": 2.705100114481535,
"grad_norm": 0.11566577851772308,
"learning_rate": 8.105732810330652e-08,
"loss": 0.0006,
"step": 22300
},
{
"epoch": 2.717230608268448,
"grad_norm": 0.06021908298134804,
"learning_rate": 8.092253359753057e-08,
"loss": 0.0006,
"step": 22400
},
{
"epoch": 2.7293611020553605,
"grad_norm": 0.07083894312381744,
"learning_rate": 8.078773909175462e-08,
"loss": 0.0005,
"step": 22500
},
{
"epoch": 2.741491595842273,
"grad_norm": 0.036980826407670975,
"learning_rate": 8.065294458597867e-08,
"loss": 0.0005,
"step": 22600
},
{
"epoch": 2.753622089629186,
"grad_norm": 0.15954025089740753,
"learning_rate": 8.051815008020273e-08,
"loss": 0.0005,
"step": 22700
},
{
"epoch": 2.7657525834160985,
"grad_norm": 0.08207129687070847,
"learning_rate": 8.038335557442678e-08,
"loss": 0.0005,
"step": 22800
},
{
"epoch": 2.777883077203011,
"grad_norm": 0.05309203267097473,
"learning_rate": 8.024856106865083e-08,
"loss": 0.0004,
"step": 22900
},
{
"epoch": 2.790013570989924,
"grad_norm": 0.06624484062194824,
"learning_rate": 8.01137665628749e-08,
"loss": 0.0004,
"step": 23000
},
{
"epoch": 2.802144064776837,
"grad_norm": 0.049933817237615585,
"learning_rate": 7.997897205709895e-08,
"loss": 0.0004,
"step": 23100
},
{
"epoch": 2.8142745585637496,
"grad_norm": 0.04547886550426483,
"learning_rate": 7.9844177551323e-08,
"loss": 0.0004,
"step": 23200
},
{
"epoch": 2.8264050523506623,
"grad_norm": 0.049001339823007584,
"learning_rate": 7.970938304554707e-08,
"loss": 0.0004,
"step": 23300
},
{
"epoch": 2.838535546137575,
"grad_norm": 0.04988383874297142,
"learning_rate": 7.957458853977112e-08,
"loss": 0.0003,
"step": 23400
},
{
"epoch": 2.8506660399244876,
"grad_norm": 0.02339191362261772,
"learning_rate": 7.943979403399516e-08,
"loss": 0.0003,
"step": 23500
},
{
"epoch": 2.8627965337114003,
"grad_norm": 0.06541607528924942,
"learning_rate": 7.930499952821923e-08,
"loss": 0.0003,
"step": 23600
},
{
"epoch": 2.874927027498313,
"grad_norm": 0.04095012694597244,
"learning_rate": 7.917020502244328e-08,
"loss": 0.0003,
"step": 23700
},
{
"epoch": 2.887057521285226,
"grad_norm": 0.031226731836795807,
"learning_rate": 7.903541051666733e-08,
"loss": 0.0003,
"step": 23800
},
{
"epoch": 2.8991880150721387,
"grad_norm": 0.0459061898291111,
"learning_rate": 7.89006160108914e-08,
"loss": 0.0003,
"step": 23900
},
{
"epoch": 2.9113185088590514,
"grad_norm": 0.016833819448947906,
"learning_rate": 7.876582150511545e-08,
"loss": 0.0003,
"step": 24000
},
{
"epoch": 2.923449002645964,
"grad_norm": 0.020327765494585037,
"learning_rate": 7.86310269993395e-08,
"loss": 0.0002,
"step": 24100
},
{
"epoch": 2.9355794964328767,
"grad_norm": 0.020058810710906982,
"learning_rate": 7.849623249356357e-08,
"loss": 0.0002,
"step": 24200
},
{
"epoch": 2.9477099902197894,
"grad_norm": 0.011464670300483704,
"learning_rate": 7.836143798778762e-08,
"loss": 0.0002,
"step": 24300
},
{
"epoch": 2.959840484006702,
"grad_norm": 0.008502807468175888,
"learning_rate": 7.822664348201166e-08,
"loss": 0.0002,
"step": 24400
},
{
"epoch": 2.9719709777936147,
"grad_norm": 0.007184523623436689,
"learning_rate": 7.809184897623573e-08,
"loss": 0.0002,
"step": 24500
},
{
"epoch": 2.9841014715805274,
"grad_norm": 0.008515238761901855,
"learning_rate": 7.795705447045978e-08,
"loss": 0.0002,
"step": 24600
},
{
"epoch": 2.99623196536744,
"grad_norm": 0.006969008129090071,
"learning_rate": 7.782225996468383e-08,
"loss": 0.0002,
"step": 24700
},
{
"epoch": 3.0083624591543527,
"grad_norm": 0.017829405143857002,
"learning_rate": 7.768746545890788e-08,
"loss": 0.0002,
"step": 24800
},
{
"epoch": 3.020492952941266,
"grad_norm": 0.006673099938780069,
"learning_rate": 7.755267095313195e-08,
"loss": 0.0002,
"step": 24900
},
{
"epoch": 3.0326234467281785,
"grad_norm": 0.04286098852753639,
"learning_rate": 7.7417876447356e-08,
"loss": 0.0001,
"step": 25000
},
{
"epoch": 3.044753940515091,
"grad_norm": 0.012195469811558723,
"learning_rate": 7.728308194158006e-08,
"loss": 0.0001,
"step": 25100
},
{
"epoch": 3.056884434302004,
"grad_norm": 0.02029520832002163,
"learning_rate": 7.714828743580412e-08,
"loss": 0.0001,
"step": 25200
},
{
"epoch": 3.0690149280889165,
"grad_norm": 0.02022946998476982,
"learning_rate": 7.701349293002816e-08,
"loss": 0.0001,
"step": 25300
},
{
"epoch": 3.081145421875829,
"grad_norm": 0.01885395683348179,
"learning_rate": 7.687869842425221e-08,
"loss": 0.0001,
"step": 25400
},
{
"epoch": 3.093275915662742,
"grad_norm": 0.020423822104930878,
"learning_rate": 7.674390391847628e-08,
"loss": 0.0001,
"step": 25500
},
{
"epoch": 3.1054064094496545,
"grad_norm": 0.012373251840472221,
"learning_rate": 7.660910941270033e-08,
"loss": 0.0001,
"step": 25600
},
{
"epoch": 3.117536903236567,
"grad_norm": 0.010483508929610252,
"learning_rate": 7.647431490692439e-08,
"loss": 0.0001,
"step": 25700
},
{
"epoch": 3.1296673970234803,
"grad_norm": 0.012648390606045723,
"learning_rate": 7.633952040114845e-08,
"loss": 0.0001,
"step": 25800
},
{
"epoch": 3.141797890810393,
"grad_norm": 0.003789283335208893,
"learning_rate": 7.62047258953725e-08,
"loss": 0.0001,
"step": 25900
},
{
"epoch": 3.1539283845973056,
"grad_norm": 0.003961450420320034,
"learning_rate": 7.606993138959656e-08,
"loss": 0.0001,
"step": 26000
},
{
"epoch": 3.1660588783842183,
"grad_norm": 0.005319498013705015,
"learning_rate": 7.593513688382062e-08,
"loss": 0.0001,
"step": 26100
},
{
"epoch": 3.178189372171131,
"grad_norm": 0.0033047376200556755,
"learning_rate": 7.580034237804467e-08,
"loss": 0.0001,
"step": 26200
},
{
"epoch": 3.1903198659580436,
"grad_norm": 0.019682567566633224,
"learning_rate": 7.566554787226871e-08,
"loss": 0.0001,
"step": 26300
},
{
"epoch": 3.2024503597449563,
"grad_norm": 0.015115097165107727,
"learning_rate": 7.553075336649278e-08,
"loss": 0.0001,
"step": 26400
},
{
"epoch": 3.214580853531869,
"grad_norm": 0.004491306375712156,
"learning_rate": 7.539595886071683e-08,
"loss": 0.0001,
"step": 26500
},
{
"epoch": 3.2267113473187816,
"grad_norm": 0.0067758746445178986,
"learning_rate": 7.526116435494089e-08,
"loss": 0.0001,
"step": 26600
},
{
"epoch": 3.2388418411056943,
"grad_norm": 0.009860140271484852,
"learning_rate": 7.512636984916495e-08,
"loss": 0.0001,
"step": 26700
},
{
"epoch": 3.2509723348926074,
"grad_norm": 0.00978156179189682,
"learning_rate": 7.4991575343389e-08,
"loss": 0.0001,
"step": 26800
},
{
"epoch": 3.26310282867952,
"grad_norm": 0.012902422808110714,
"learning_rate": 7.485678083761306e-08,
"loss": 0.0001,
"step": 26900
},
{
"epoch": 3.2752333224664327,
"grad_norm": 0.004666306544095278,
"learning_rate": 7.472198633183711e-08,
"loss": 0.0,
"step": 27000
},
{
"epoch": 3.2873638162533454,
"grad_norm": 0.00277140736579895,
"learning_rate": 7.458719182606118e-08,
"loss": 0.0,
"step": 27100
},
{
"epoch": 3.299494310040258,
"grad_norm": 0.0019914316944777966,
"learning_rate": 7.445239732028521e-08,
"loss": 0.0,
"step": 27200
},
{
"epoch": 3.3116248038271707,
"grad_norm": 0.0016523301601409912,
"learning_rate": 7.431760281450927e-08,
"loss": 0.0,
"step": 27300
},
{
"epoch": 3.3237552976140834,
"grad_norm": 0.003063188400119543,
"learning_rate": 7.418280830873333e-08,
"loss": 0.0,
"step": 27400
},
{
"epoch": 3.335885791400996,
"grad_norm": 0.0045642610639333725,
"learning_rate": 7.404801380295739e-08,
"loss": 0.0,
"step": 27500
},
{
"epoch": 3.348016285187909,
"grad_norm": 0.005651027895510197,
"learning_rate": 7.391321929718144e-08,
"loss": 0.0,
"step": 27600
},
{
"epoch": 3.360146778974822,
"grad_norm": 0.005226987414062023,
"learning_rate": 7.37784247914055e-08,
"loss": 0.0,
"step": 27700
},
{
"epoch": 3.3722772727617345,
"grad_norm": 0.0039469217881560326,
"learning_rate": 7.364363028562956e-08,
"loss": 0.0,
"step": 27800
},
{
"epoch": 3.384407766548647,
"grad_norm": 0.0011639875592663884,
"learning_rate": 7.350883577985361e-08,
"loss": 0.0,
"step": 27900
},
{
"epoch": 3.39653826033556,
"grad_norm": 0.002607525559142232,
"learning_rate": 7.337404127407768e-08,
"loss": 0.0,
"step": 28000
},
{
"epoch": 3.4086687541224725,
"grad_norm": 0.0024226950481534004,
"learning_rate": 7.323924676830172e-08,
"loss": 0.0,
"step": 28100
},
{
"epoch": 3.420799247909385,
"grad_norm": 0.008228462189435959,
"learning_rate": 7.310445226252577e-08,
"loss": 0.0,
"step": 28200
},
{
"epoch": 3.432929741696298,
"grad_norm": 0.0010171543108299375,
"learning_rate": 7.296965775674983e-08,
"loss": 0.0,
"step": 28300
},
{
"epoch": 3.4450602354832105,
"grad_norm": 0.0010841701878234744,
"learning_rate": 7.283486325097389e-08,
"loss": 0.0,
"step": 28400
},
{
"epoch": 3.457190729270123,
"grad_norm": 0.0019339750288054347,
"learning_rate": 7.270006874519794e-08,
"loss": 0.0,
"step": 28500
},
{
"epoch": 3.469321223057036,
"grad_norm": 0.0008356723701581359,
"learning_rate": 7.2565274239422e-08,
"loss": 0.0,
"step": 28600
},
{
"epoch": 3.481451716843949,
"grad_norm": 0.0033976007252931595,
"learning_rate": 7.243047973364606e-08,
"loss": 0.0,
"step": 28700
},
{
"epoch": 3.4935822106308616,
"grad_norm": 0.0017918187659233809,
"learning_rate": 7.229568522787011e-08,
"loss": 0.0,
"step": 28800
},
{
"epoch": 3.5057127044177743,
"grad_norm": 0.000810507161077112,
"learning_rate": 7.216089072209416e-08,
"loss": 0.0,
"step": 28900
},
{
"epoch": 3.517843198204687,
"grad_norm": 0.0006327140727080405,
"learning_rate": 7.202609621631822e-08,
"loss": 0.0,
"step": 29000
},
{
"epoch": 3.5299736919915996,
"grad_norm": 0.0008019423694349825,
"learning_rate": 7.189130171054227e-08,
"loss": 0.0,
"step": 29100
},
{
"epoch": 3.5421041857785123,
"grad_norm": 0.001055610366165638,
"learning_rate": 7.175650720476632e-08,
"loss": 0.0,
"step": 29200
},
{
"epoch": 3.554234679565425,
"grad_norm": 0.0005360008217394352,
"learning_rate": 7.162171269899039e-08,
"loss": 0.0,
"step": 29300
},
{
"epoch": 3.566365173352338,
"grad_norm": 0.0012855017557740211,
"learning_rate": 7.148691819321444e-08,
"loss": 0.0,
"step": 29400
},
{
"epoch": 3.5784956671392507,
"grad_norm": 0.0014753304421901703,
"learning_rate": 7.135212368743849e-08,
"loss": 0.0,
"step": 29500
},
{
"epoch": 3.5906261609261634,
"grad_norm": 0.0004512036102823913,
"learning_rate": 7.121732918166256e-08,
"loss": 0.0,
"step": 29600
},
{
"epoch": 3.602756654713076,
"grad_norm": 0.0014754869043827057,
"learning_rate": 7.108253467588661e-08,
"loss": 0.0,
"step": 29700
},
{
"epoch": 3.6148871484999887,
"grad_norm": 0.0005086124874651432,
"learning_rate": 7.094774017011066e-08,
"loss": 0.0,
"step": 29800
},
{
"epoch": 3.6270176422869014,
"grad_norm": 0.0010658778483048081,
"learning_rate": 7.081294566433472e-08,
"loss": 0.0,
"step": 29900
},
{
"epoch": 3.639148136073814,
"grad_norm": 0.00038994685746729374,
"learning_rate": 7.067815115855877e-08,
"loss": 0.0,
"step": 30000
},
{
"epoch": 3.639148136073814,
"eval_loss": 8.61085754877422e-06,
"eval_runtime": 13089.3428,
"eval_samples_per_second": 32.246,
"eval_steps_per_second": 4.031,
"step": 30000
},
{
"epoch": 3.6512786298607267,
"grad_norm": 0.0005141702713444829,
"learning_rate": 7.054335665278282e-08,
"loss": 0.0,
"step": 30100
},
{
"epoch": 3.6634091236476394,
"grad_norm": 0.001066096592694521,
"learning_rate": 7.040856214700689e-08,
"loss": 0.0,
"step": 30200
},
{
"epoch": 3.675539617434552,
"grad_norm": 0.0003840310382656753,
"learning_rate": 7.027376764123094e-08,
"loss": 0.0,
"step": 30300
},
{
"epoch": 3.6876701112214647,
"grad_norm": 0.0003469325019977987,
"learning_rate": 7.013897313545499e-08,
"loss": 0.0,
"step": 30400
},
{
"epoch": 3.6998006050083774,
"grad_norm": 0.0008366837282665074,
"learning_rate": 7.000417862967906e-08,
"loss": 0.0,
"step": 30500
},
{
"epoch": 3.7119310987952905,
"grad_norm": 0.00028104009106755257,
"learning_rate": 6.986938412390311e-08,
"loss": 0.0,
"step": 30600
},
{
"epoch": 3.724061592582203,
"grad_norm": 0.0009309325832873583,
"learning_rate": 6.973458961812716e-08,
"loss": 0.0,
"step": 30700
},
{
"epoch": 3.736192086369116,
"grad_norm": 0.00024238611513283104,
"learning_rate": 6.959979511235122e-08,
"loss": 0.0,
"step": 30800
},
{
"epoch": 3.7483225801560285,
"grad_norm": 0.00021373844356276095,
"learning_rate": 6.946500060657527e-08,
"loss": 0.0,
"step": 30900
},
{
"epoch": 3.760453073942941,
"grad_norm": 0.0007159899105317891,
"learning_rate": 6.933020610079932e-08,
"loss": 0.0,
"step": 31000
},
{
"epoch": 3.772583567729854,
"grad_norm": 0.0006215888424776495,
"learning_rate": 6.919541159502337e-08,
"loss": 0.0,
"step": 31100
},
{
"epoch": 3.7847140615167665,
"grad_norm": 0.0001826356747187674,
"learning_rate": 6.906061708924744e-08,
"loss": 0.0,
"step": 31200
},
{
"epoch": 3.7968445553036796,
"grad_norm": 0.0002084925799863413,
"learning_rate": 6.892582258347149e-08,
"loss": 0.0,
"step": 31300
},
{
"epoch": 3.8089750490905923,
"grad_norm": 0.00020457223581615835,
"learning_rate": 6.879102807769555e-08,
"loss": 0.0,
"step": 31400
},
{
"epoch": 3.821105542877505,
"grad_norm": 0.00044186966260895133,
"learning_rate": 6.865623357191961e-08,
"loss": 0.0,
"step": 31500
},
{
"epoch": 3.8332360366644176,
"grad_norm": 0.00029807299142703414,
"learning_rate": 6.852143906614366e-08,
"loss": 0.0,
"step": 31600
},
{
"epoch": 3.8453665304513303,
"grad_norm": 0.0004684592713601887,
"learning_rate": 6.838664456036772e-08,
"loss": 0.0,
"step": 31700
},
{
"epoch": 3.857497024238243,
"grad_norm": 0.00024593668058514595,
"learning_rate": 6.825185005459177e-08,
"loss": 0.0,
"step": 31800
},
{
"epoch": 3.8696275180251556,
"grad_norm": 0.0001907894911710173,
"learning_rate": 6.811705554881582e-08,
"loss": 0.0,
"step": 31900
},
{
"epoch": 3.8817580118120683,
"grad_norm": 0.0001471816358389333,
"learning_rate": 6.798226104303987e-08,
"loss": 0.0,
"step": 32000
},
{
"epoch": 3.893888505598981,
"grad_norm": 0.0004228481266181916,
"learning_rate": 6.784746653726394e-08,
"loss": 0.0,
"step": 32100
},
{
"epoch": 3.9060189993858936,
"grad_norm": 0.00015706397243775427,
"learning_rate": 6.7712672031488e-08,
"loss": 0.0,
"step": 32200
},
{
"epoch": 3.9181494931728063,
"grad_norm": 0.00015203200746327639,
"learning_rate": 6.757787752571205e-08,
"loss": 0.0,
"step": 32300
},
{
"epoch": 3.930279986959719,
"grad_norm": 0.00012529987725429237,
"learning_rate": 6.744308301993611e-08,
"loss": 0.0,
"step": 32400
},
{
"epoch": 3.942410480746632,
"grad_norm": 0.00023667830100748688,
"learning_rate": 6.730828851416016e-08,
"loss": 0.0,
"step": 32500
},
{
"epoch": 3.9545409745335447,
"grad_norm": 0.0003280766832176596,
"learning_rate": 6.717349400838422e-08,
"loss": 0.0,
"step": 32600
},
{
"epoch": 3.9666714683204574,
"grad_norm": 0.0003750512842088938,
"learning_rate": 6.703869950260827e-08,
"loss": 0.0,
"step": 32700
},
{
"epoch": 3.97880196210737,
"grad_norm": 0.00031201005913317204,
"learning_rate": 6.690390499683232e-08,
"loss": 0.0,
"step": 32800
},
{
"epoch": 3.9909324558942827,
"grad_norm": 0.00028819395811297,
"learning_rate": 6.676911049105638e-08,
"loss": 0.0,
"step": 32900
},
{
"epoch": 4.003062949681196,
"grad_norm": 0.00012892342056147754,
"learning_rate": 6.663431598528043e-08,
"loss": 0.0,
"step": 33000
},
{
"epoch": 4.0151934434681085,
"grad_norm": 7.572331378469244e-05,
"learning_rate": 6.64995214795045e-08,
"loss": 0.0,
"step": 33100
},
{
"epoch": 4.027323937255021,
"grad_norm": 7.666134479222819e-05,
"learning_rate": 6.636472697372855e-08,
"loss": 0.0,
"step": 33200
},
{
"epoch": 4.039454431041934,
"grad_norm": 7.41102485335432e-05,
"learning_rate": 6.62299324679526e-08,
"loss": 0.0,
"step": 33300
},
{
"epoch": 4.0515849248288465,
"grad_norm": 5.6044456869130954e-05,
"learning_rate": 6.609513796217667e-08,
"loss": 0.0,
"step": 33400
},
{
"epoch": 4.063715418615759,
"grad_norm": 7.278579141711816e-05,
"learning_rate": 6.596034345640072e-08,
"loss": 0.0,
"step": 33500
},
{
"epoch": 4.075845912402672,
"grad_norm": 0.00030291761504486203,
"learning_rate": 6.582554895062477e-08,
"loss": 0.0,
"step": 33600
},
{
"epoch": 4.0879764061895845,
"grad_norm": 0.00017951276095118374,
"learning_rate": 6.569075444484882e-08,
"loss": 0.0,
"step": 33700
},
{
"epoch": 4.100106899976497,
"grad_norm": 5.9736263210652396e-05,
"learning_rate": 6.555595993907288e-08,
"loss": 0.0,
"step": 33800
},
{
"epoch": 4.11223739376341,
"grad_norm": 0.00021180949988774955,
"learning_rate": 6.542116543329693e-08,
"loss": 0.0,
"step": 33900
},
{
"epoch": 4.1243678875503225,
"grad_norm": 0.00012666590919252485,
"learning_rate": 6.5286370927521e-08,
"loss": 0.0,
"step": 34000
},
{
"epoch": 4.136498381337235,
"grad_norm": 4.756751877721399e-05,
"learning_rate": 6.515157642174505e-08,
"loss": 0.0,
"step": 34100
},
{
"epoch": 4.148628875124148,
"grad_norm": 3.8059039070503786e-05,
"learning_rate": 6.50167819159691e-08,
"loss": 0.0,
"step": 34200
},
{
"epoch": 4.1607593689110605,
"grad_norm": 6.0839298384962603e-05,
"learning_rate": 6.488198741019317e-08,
"loss": 0.0,
"step": 34300
},
{
"epoch": 4.172889862697973,
"grad_norm": 0.0002114167291438207,
"learning_rate": 6.474719290441722e-08,
"loss": 0.0,
"step": 34400
},
{
"epoch": 4.185020356484886,
"grad_norm": 0.00015508649812545627,
"learning_rate": 6.461239839864127e-08,
"loss": 0.0,
"step": 34500
},
{
"epoch": 4.1971508502717985,
"grad_norm": 7.546142296632752e-05,
"learning_rate": 6.447760389286532e-08,
"loss": 0.0,
"step": 34600
},
{
"epoch": 4.209281344058712,
"grad_norm": 3.271881359978579e-05,
"learning_rate": 6.434280938708938e-08,
"loss": 0.0,
"step": 34700
},
{
"epoch": 4.221411837845625,
"grad_norm": 0.0001592914341017604,
"learning_rate": 6.420801488131343e-08,
"loss": 0.0,
"step": 34800
},
{
"epoch": 4.233542331632537,
"grad_norm": 5.735379454563372e-05,
"learning_rate": 6.40732203755375e-08,
"loss": 0.0,
"step": 34900
},
{
"epoch": 4.24567282541945,
"grad_norm": 0.00011750426347134635,
"learning_rate": 6.393842586976155e-08,
"loss": 0.0,
"step": 35000
},
{
"epoch": 4.257803319206363,
"grad_norm": 8.914129284676164e-05,
"learning_rate": 6.38036313639856e-08,
"loss": 0.0,
"step": 35100
},
{
"epoch": 4.269933812993275,
"grad_norm": 2.7345347916707397e-05,
"learning_rate": 6.366883685820965e-08,
"loss": 0.0,
"step": 35200
},
{
"epoch": 4.282064306780188,
"grad_norm": 4.532103048404679e-05,
"learning_rate": 6.353404235243372e-08,
"loss": 0.0,
"step": 35300
},
{
"epoch": 4.294194800567101,
"grad_norm": 5.636207788484171e-05,
"learning_rate": 6.339924784665777e-08,
"loss": 0.0,
"step": 35400
},
{
"epoch": 4.306325294354013,
"grad_norm": 0.00013345239858608693,
"learning_rate": 6.326445334088182e-08,
"loss": 0.0,
"step": 35500
},
{
"epoch": 4.318455788140926,
"grad_norm": 8.585578325437382e-05,
"learning_rate": 6.312965883510588e-08,
"loss": 0.0,
"step": 35600
},
{
"epoch": 4.330586281927839,
"grad_norm": 2.5150986402877606e-05,
"learning_rate": 6.299486432932993e-08,
"loss": 0.0,
"step": 35700
},
{
"epoch": 4.342716775714751,
"grad_norm": 3.5774806747213006e-05,
"learning_rate": 6.286006982355398e-08,
"loss": 0.0,
"step": 35800
},
{
"epoch": 4.354847269501664,
"grad_norm": 4.836301377508789e-05,
"learning_rate": 6.272527531777805e-08,
"loss": 0.0,
"step": 35900
},
{
"epoch": 4.366977763288577,
"grad_norm": 2.2555306713911705e-05,
"learning_rate": 6.25904808120021e-08,
"loss": 0.0,
"step": 36000
},
{
"epoch": 4.379108257075489,
"grad_norm": 3.531112088239752e-05,
"learning_rate": 6.245568630622615e-08,
"loss": 0.0,
"step": 36100
},
{
"epoch": 4.391238750862402,
"grad_norm": 7.099560025380924e-05,
"learning_rate": 6.232089180045022e-08,
"loss": 0.0,
"step": 36200
},
{
"epoch": 4.403369244649315,
"grad_norm": 0.00011959305265918374,
"learning_rate": 6.218609729467427e-08,
"loss": 0.0,
"step": 36300
},
{
"epoch": 4.415499738436227,
"grad_norm": 4.892437209491618e-05,
"learning_rate": 6.205130278889832e-08,
"loss": 0.0,
"step": 36400
},
{
"epoch": 4.427630232223141,
"grad_norm": 1.5868727132328786e-05,
"learning_rate": 6.191650828312238e-08,
"loss": 0.0,
"step": 36500
},
{
"epoch": 4.439760726010054,
"grad_norm": 4.7836245357757434e-05,
"learning_rate": 6.178171377734643e-08,
"loss": 0.0,
"step": 36600
},
{
"epoch": 4.451891219796966,
"grad_norm": 9.044109901878983e-05,
"learning_rate": 6.164691927157048e-08,
"loss": 0.0,
"step": 36700
},
{
"epoch": 4.464021713583879,
"grad_norm": 7.068177364999428e-05,
"learning_rate": 6.151212476579455e-08,
"loss": 0.0,
"step": 36800
},
{
"epoch": 4.476152207370792,
"grad_norm": 2.011969445447903e-05,
"learning_rate": 6.13773302600186e-08,
"loss": 0.0,
"step": 36900
},
{
"epoch": 4.488282701157704,
"grad_norm": 9.63186175795272e-05,
"learning_rate": 6.124253575424265e-08,
"loss": 0.0,
"step": 37000
},
{
"epoch": 4.500413194944617,
"grad_norm": 5.197514474275522e-05,
"learning_rate": 6.11077412484667e-08,
"loss": 0.0,
"step": 37100
},
{
"epoch": 4.51254368873153,
"grad_norm": 2.6003468519775197e-05,
"learning_rate": 6.097294674269077e-08,
"loss": 0.0,
"step": 37200
},
{
"epoch": 4.524674182518442,
"grad_norm": 7.684047886868939e-05,
"learning_rate": 6.083815223691482e-08,
"loss": 0.0,
"step": 37300
},
{
"epoch": 4.536804676305355,
"grad_norm": 0.00010752572416095063,
"learning_rate": 6.070335773113888e-08,
"loss": 0.0,
"step": 37400
},
{
"epoch": 4.5489351700922676,
"grad_norm": 4.4010826968587935e-05,
"learning_rate": 6.056856322536293e-08,
"loss": 0.0,
"step": 37500
},
{
"epoch": 4.56106566387918,
"grad_norm": 0.00011428508150856942,
"learning_rate": 6.043376871958698e-08,
"loss": 0.0,
"step": 37600
},
{
"epoch": 4.573196157666093,
"grad_norm": 0.00011925880244234577,
"learning_rate": 6.029897421381104e-08,
"loss": 0.0,
"step": 37700
},
{
"epoch": 4.5853266514530056,
"grad_norm": 4.866239396505989e-05,
"learning_rate": 6.01641797080351e-08,
"loss": 0.0,
"step": 37800
},
{
"epoch": 4.597457145239918,
"grad_norm": 4.699817145592533e-05,
"learning_rate": 6.002938520225915e-08,
"loss": 0.0,
"step": 37900
},
{
"epoch": 4.609587639026831,
"grad_norm": 3.538289820426144e-05,
"learning_rate": 5.989459069648321e-08,
"loss": 0.0,
"step": 38000
},
{
"epoch": 4.6217181328137436,
"grad_norm": 0.0001686308823991567,
"learning_rate": 5.975979619070727e-08,
"loss": 0.0,
"step": 38100
},
{
"epoch": 4.633848626600656,
"grad_norm": 3.119569373666309e-05,
"learning_rate": 5.962500168493133e-08,
"loss": 0.0,
"step": 38200
},
{
"epoch": 4.645979120387569,
"grad_norm": 1.2374849575280678e-05,
"learning_rate": 5.949020717915537e-08,
"loss": 0.0,
"step": 38300
},
{
"epoch": 4.6581096141744815,
"grad_norm": 1.740476545819547e-05,
"learning_rate": 5.935541267337944e-08,
"loss": 0.0,
"step": 38400
},
{
"epoch": 4.670240107961394,
"grad_norm": 2.1879846826777793e-05,
"learning_rate": 5.9220618167603483e-08,
"loss": 0.0,
"step": 38500
},
{
"epoch": 4.682370601748308,
"grad_norm": 6.039372965460643e-05,
"learning_rate": 5.9085823661827536e-08,
"loss": 0.0,
"step": 38600
},
{
"epoch": 4.69450109553522,
"grad_norm": 3.6898843973176554e-05,
"learning_rate": 5.89510291560516e-08,
"loss": 0.0,
"step": 38700
},
{
"epoch": 4.706631589322133,
"grad_norm": 3.252164970035665e-05,
"learning_rate": 5.8816234650275655e-08,
"loss": 0.0,
"step": 38800
},
{
"epoch": 4.718762083109046,
"grad_norm": 8.90696537680924e-06,
"learning_rate": 5.868144014449971e-08,
"loss": 0.0,
"step": 38900
},
{
"epoch": 4.730892576895958,
"grad_norm": 2.7334457627148367e-05,
"learning_rate": 5.8546645638723766e-08,
"loss": 0.0,
"step": 39000
},
{
"epoch": 4.743023070682871,
"grad_norm": 8.614475518697873e-06,
"learning_rate": 5.841185113294782e-08,
"loss": 0.0,
"step": 39100
},
{
"epoch": 4.755153564469784,
"grad_norm": 1.1286195331194904e-05,
"learning_rate": 5.827705662717187e-08,
"loss": 0.0,
"step": 39200
},
{
"epoch": 4.767284058256696,
"grad_norm": 1.4409168215934187e-05,
"learning_rate": 5.8142262121395924e-08,
"loss": 0.0,
"step": 39300
},
{
"epoch": 4.779414552043609,
"grad_norm": 8.718334720470011e-05,
"learning_rate": 5.800746761561999e-08,
"loss": 0.0,
"step": 39400
},
{
"epoch": 4.791545045830522,
"grad_norm": 3.0478166081593372e-05,
"learning_rate": 5.7872673109844036e-08,
"loss": 0.0,
"step": 39500
},
{
"epoch": 4.803675539617434,
"grad_norm": 4.2255876905983314e-05,
"learning_rate": 5.773787860406809e-08,
"loss": 0.0,
"step": 39600
},
{
"epoch": 4.815806033404347,
"grad_norm": 4.204777360428125e-05,
"learning_rate": 5.7603084098292155e-08,
"loss": 0.0,
"step": 39700
},
{
"epoch": 4.82793652719126,
"grad_norm": 4.219416223349981e-05,
"learning_rate": 5.746828959251621e-08,
"loss": 0.0,
"step": 39800
},
{
"epoch": 4.840067020978172,
"grad_norm": 5.802089435746893e-05,
"learning_rate": 5.733349508674026e-08,
"loss": 0.0,
"step": 39900
},
{
"epoch": 4.852197514765085,
"grad_norm": 7.914522575447336e-05,
"learning_rate": 5.719870058096432e-08,
"loss": 0.0,
"step": 40000
},
{
"epoch": 4.852197514765085,
"eval_loss": 2.0435865621948324e-07,
"eval_runtime": 12761.113,
"eval_samples_per_second": 33.075,
"eval_steps_per_second": 4.134,
"step": 40000
},
{
"epoch": 4.864328008551998,
"grad_norm": 4.935232936986722e-05,
"learning_rate": 5.706390607518837e-08,
"loss": 0.0,
"step": 40100
},
{
"epoch": 4.876458502338911,
"grad_norm": 7.840626494726166e-05,
"learning_rate": 5.6929111569412425e-08,
"loss": 0.0,
"step": 40200
},
{
"epoch": 4.888588996125824,
"grad_norm": 1.2734864867525175e-05,
"learning_rate": 5.679431706363649e-08,
"loss": 0.0,
"step": 40300
},
{
"epoch": 4.900719489912737,
"grad_norm": 4.8442419938510284e-05,
"learning_rate": 5.665952255786054e-08,
"loss": 0.0,
"step": 40400
},
{
"epoch": 4.912849983699649,
"grad_norm": 6.906664930284023e-05,
"learning_rate": 5.652472805208459e-08,
"loss": 0.0,
"step": 40500
},
{
"epoch": 4.924980477486562,
"grad_norm": 5.0475380703574046e-05,
"learning_rate": 5.6389933546308655e-08,
"loss": 0.0,
"step": 40600
},
{
"epoch": 4.937110971273475,
"grad_norm": 1.4410921721719205e-05,
"learning_rate": 5.625513904053271e-08,
"loss": 0.0,
"step": 40700
},
{
"epoch": 4.949241465060387,
"grad_norm": 4.081324368598871e-05,
"learning_rate": 5.612034453475676e-08,
"loss": 0.0,
"step": 40800
},
{
"epoch": 4.9613719588473,
"grad_norm": 7.731416189926676e-06,
"learning_rate": 5.598555002898082e-08,
"loss": 0.0,
"step": 40900
},
{
"epoch": 4.973502452634213,
"grad_norm": 1.6508680346305482e-05,
"learning_rate": 5.585075552320487e-08,
"loss": 0.0,
"step": 41000
},
{
"epoch": 4.985632946421125,
"grad_norm": 3.791180643020198e-05,
"learning_rate": 5.5715961017428925e-08,
"loss": 0.0,
"step": 41100
},
{
"epoch": 4.997763440208038,
"grad_norm": 2.726632374105975e-05,
"learning_rate": 5.558116651165298e-08,
"loss": 0.0,
"step": 41200
},
{
"epoch": 5.009893933994951,
"grad_norm": 0.0001511267473688349,
"learning_rate": 5.544637200587704e-08,
"loss": 0.0,
"step": 41300
},
{
"epoch": 5.022024427781863,
"grad_norm": 1.2235775102453772e-05,
"learning_rate": 5.531157750010109e-08,
"loss": 0.0,
"step": 41400
},
{
"epoch": 5.034154921568776,
"grad_norm": 0.00012380690895952284,
"learning_rate": 5.517678299432514e-08,
"loss": 0.0,
"step": 41500
},
{
"epoch": 5.046285415355689,
"grad_norm": 2.332998155907262e-05,
"learning_rate": 5.504198848854921e-08,
"loss": 0.0,
"step": 41600
},
{
"epoch": 5.058415909142601,
"grad_norm": 0.00012525348574854434,
"learning_rate": 5.490719398277326e-08,
"loss": 0.0,
"step": 41700
},
{
"epoch": 5.070546402929514,
"grad_norm": 2.9540859031840228e-05,
"learning_rate": 5.4772399476997314e-08,
"loss": 0.0,
"step": 41800
},
{
"epoch": 5.082676896716427,
"grad_norm": 5.814078758703545e-05,
"learning_rate": 5.463760497122137e-08,
"loss": 0.0,
"step": 41900
},
{
"epoch": 5.094807390503339,
"grad_norm": 0.00012713873002212495,
"learning_rate": 5.4502810465445426e-08,
"loss": 0.0,
"step": 42000
},
{
"epoch": 5.106937884290252,
"grad_norm": 0.0001255370443686843,
"learning_rate": 5.436801595966948e-08,
"loss": 0.0,
"step": 42100
},
{
"epoch": 5.1190683780771655,
"grad_norm": 0.00012677146878559142,
"learning_rate": 5.423322145389354e-08,
"loss": 0.0,
"step": 42200
},
{
"epoch": 5.131198871864078,
"grad_norm": 5.592630259343423e-06,
"learning_rate": 5.409842694811759e-08,
"loss": 0.0,
"step": 42300
},
{
"epoch": 5.143329365650991,
"grad_norm": 3.1847266654949635e-05,
"learning_rate": 5.396363244234164e-08,
"loss": 0.0,
"step": 42400
},
{
"epoch": 5.1554598594379035,
"grad_norm": 3.685112460516393e-05,
"learning_rate": 5.382883793656571e-08,
"loss": 0.0,
"step": 42500
},
{
"epoch": 5.167590353224816,
"grad_norm": 1.862756471382454e-05,
"learning_rate": 5.369404343078976e-08,
"loss": 0.0,
"step": 42600
},
{
"epoch": 5.179720847011729,
"grad_norm": 3.480441591818817e-05,
"learning_rate": 5.3559248925013814e-08,
"loss": 0.0,
"step": 42700
},
{
"epoch": 5.1918513407986415,
"grad_norm": 5.029854946769774e-06,
"learning_rate": 5.3424454419237874e-08,
"loss": 0.0,
"step": 42800
},
{
"epoch": 5.203981834585554,
"grad_norm": 2.7888721888302825e-05,
"learning_rate": 5.3289659913461926e-08,
"loss": 0.0,
"step": 42900
},
{
"epoch": 5.216112328372467,
"grad_norm": 1.3389450941758696e-05,
"learning_rate": 5.315486540768598e-08,
"loss": 0.0,
"step": 43000
},
{
"epoch": 5.2282428221593795,
"grad_norm": 5.3387711886898614e-06,
"learning_rate": 5.302007090191004e-08,
"loss": 0.0,
"step": 43100
},
{
"epoch": 5.240373315946292,
"grad_norm": 4.869915301242145e-06,
"learning_rate": 5.288527639613409e-08,
"loss": 0.0,
"step": 43200
},
{
"epoch": 5.252503809733205,
"grad_norm": 1.4157280020299368e-05,
"learning_rate": 5.2750481890358144e-08,
"loss": 0.0,
"step": 43300
},
{
"epoch": 5.2646343035201175,
"grad_norm": 1.0791780368890613e-05,
"learning_rate": 5.2615687384582196e-08,
"loss": 0.0,
"step": 43400
},
{
"epoch": 5.27676479730703,
"grad_norm": 3.652514351415448e-05,
"learning_rate": 5.248089287880626e-08,
"loss": 0.0,
"step": 43500
},
{
"epoch": 5.288895291093943,
"grad_norm": 4.087711567990482e-05,
"learning_rate": 5.2346098373030315e-08,
"loss": 0.0,
"step": 43600
},
{
"epoch": 5.3010257848808555,
"grad_norm": 2.1705629478674382e-05,
"learning_rate": 5.221130386725436e-08,
"loss": 0.0,
"step": 43700
},
{
"epoch": 5.313156278667768,
"grad_norm": 2.8108963306294754e-05,
"learning_rate": 5.2076509361478427e-08,
"loss": 0.0,
"step": 43800
},
{
"epoch": 5.325286772454681,
"grad_norm": 2.8635831768042408e-05,
"learning_rate": 5.194171485570248e-08,
"loss": 0.0,
"step": 43900
},
{
"epoch": 5.337417266241594,
"grad_norm": 3.3284202800132334e-05,
"learning_rate": 5.180692034992653e-08,
"loss": 0.0,
"step": 44000
},
{
"epoch": 5.349547760028507,
"grad_norm": 2.190342274843715e-05,
"learning_rate": 5.167212584415059e-08,
"loss": 0.0,
"step": 44100
},
{
"epoch": 5.36167825381542,
"grad_norm": 2.5555082174832933e-05,
"learning_rate": 5.1537331338374644e-08,
"loss": 0.0,
"step": 44200
},
{
"epoch": 5.373808747602332,
"grad_norm": 3.973677667090669e-05,
"learning_rate": 5.1402536832598697e-08,
"loss": 0.0,
"step": 44300
},
{
"epoch": 5.385939241389245,
"grad_norm": 3.239759826101363e-05,
"learning_rate": 5.126774232682276e-08,
"loss": 0.0,
"step": 44400
},
{
"epoch": 5.398069735176158,
"grad_norm": 5.3550720622297376e-05,
"learning_rate": 5.1132947821046815e-08,
"loss": 0.0,
"step": 44500
},
{
"epoch": 5.41020022896307,
"grad_norm": 3.1185478292172775e-05,
"learning_rate": 5.099815331527086e-08,
"loss": 0.0,
"step": 44600
},
{
"epoch": 5.422330722749983,
"grad_norm": 5.0614133215276524e-05,
"learning_rate": 5.086335880949493e-08,
"loss": 0.0,
"step": 44700
},
{
"epoch": 5.434461216536896,
"grad_norm": 2.4577335352660157e-05,
"learning_rate": 5.072856430371898e-08,
"loss": 0.0,
"step": 44800
},
{
"epoch": 5.446591710323808,
"grad_norm": 2.079096157103777e-05,
"learning_rate": 5.059376979794303e-08,
"loss": 0.0,
"step": 44900
},
{
"epoch": 5.458722204110721,
"grad_norm": 1.3388003935688175e-05,
"learning_rate": 5.045897529216709e-08,
"loss": 0.0,
"step": 45000
},
{
"epoch": 5.470852697897634,
"grad_norm": 2.5073253709706478e-05,
"learning_rate": 5.0324180786391144e-08,
"loss": 0.0,
"step": 45100
},
{
"epoch": 5.482983191684546,
"grad_norm": 1.678628359513823e-05,
"learning_rate": 5.01893862806152e-08,
"loss": 0.0,
"step": 45200
},
{
"epoch": 5.495113685471459,
"grad_norm": 1.079649791790871e-05,
"learning_rate": 5.005459177483925e-08,
"loss": 0.0,
"step": 45300
},
{
"epoch": 5.507244179258372,
"grad_norm": 1.4951794582884759e-05,
"learning_rate": 4.9919797269063316e-08,
"loss": 0.0,
"step": 45400
},
{
"epoch": 5.519374673045284,
"grad_norm": 5.0269860366825014e-05,
"learning_rate": 4.978500276328736e-08,
"loss": 0.0,
"step": 45500
},
{
"epoch": 5.531505166832197,
"grad_norm": 2.7576521461014636e-05,
"learning_rate": 4.965020825751142e-08,
"loss": 0.0,
"step": 45600
},
{
"epoch": 5.54363566061911,
"grad_norm": 4.802513285540044e-05,
"learning_rate": 4.951541375173548e-08,
"loss": 0.0,
"step": 45700
},
{
"epoch": 5.555766154406022,
"grad_norm": 9.436444997845683e-06,
"learning_rate": 4.938061924595953e-08,
"loss": 0.0,
"step": 45800
},
{
"epoch": 5.567896648192935,
"grad_norm": 1.7118674804805778e-05,
"learning_rate": 4.924582474018359e-08,
"loss": 0.0,
"step": 45900
},
{
"epoch": 5.580027141979848,
"grad_norm": 1.7416510672774166e-05,
"learning_rate": 4.911103023440764e-08,
"loss": 0.0,
"step": 46000
},
{
"epoch": 5.592157635766761,
"grad_norm": 3.0051314752199687e-05,
"learning_rate": 4.89762357286317e-08,
"loss": 0.0,
"step": 46100
},
{
"epoch": 5.604288129553674,
"grad_norm": 7.384042419289472e-06,
"learning_rate": 4.884144122285576e-08,
"loss": 0.0,
"step": 46200
},
{
"epoch": 5.616418623340587,
"grad_norm": 3.820080746663734e-05,
"learning_rate": 4.870664671707981e-08,
"loss": 0.0,
"step": 46300
},
{
"epoch": 5.628549117127499,
"grad_norm": 1.9920646082027815e-05,
"learning_rate": 4.857185221130386e-08,
"loss": 0.0,
"step": 46400
},
{
"epoch": 5.640679610914412,
"grad_norm": 3.2449988793814555e-05,
"learning_rate": 4.843705770552792e-08,
"loss": 0.0,
"step": 46500
},
{
"epoch": 5.652810104701325,
"grad_norm": 1.5992029148037545e-05,
"learning_rate": 4.8302263199751974e-08,
"loss": 0.0,
"step": 46600
},
{
"epoch": 5.664940598488237,
"grad_norm": 8.42284680402372e-06,
"learning_rate": 4.8167468693976033e-08,
"loss": 0.0,
"step": 46700
},
{
"epoch": 5.67707109227515,
"grad_norm": 3.363145879120566e-05,
"learning_rate": 4.8032674188200086e-08,
"loss": 0.0,
"step": 46800
},
{
"epoch": 5.689201586062063,
"grad_norm": 9.66928928392008e-06,
"learning_rate": 4.789787968242414e-08,
"loss": 0.0,
"step": 46900
},
{
"epoch": 5.701332079848975,
"grad_norm": 4.893206278211437e-05,
"learning_rate": 4.77630851766482e-08,
"loss": 0.0,
"step": 47000
},
{
"epoch": 5.713462573635888,
"grad_norm": 2.201042661909014e-05,
"learning_rate": 4.762829067087225e-08,
"loss": 0.0,
"step": 47100
},
{
"epoch": 5.725593067422801,
"grad_norm": 1.98000852833502e-05,
"learning_rate": 4.749349616509631e-08,
"loss": 0.0,
"step": 47200
},
{
"epoch": 5.737723561209713,
"grad_norm": 7.69750931794988e-06,
"learning_rate": 4.735870165932037e-08,
"loss": 0.0,
"step": 47300
},
{
"epoch": 5.749854054996626,
"grad_norm": 4.621636435331311e-06,
"learning_rate": 4.7223907153544415e-08,
"loss": 0.0,
"step": 47400
},
{
"epoch": 5.761984548783539,
"grad_norm": 1.4387391274794936e-05,
"learning_rate": 4.7089112647768474e-08,
"loss": 0.0,
"step": 47500
},
{
"epoch": 5.774115042570451,
"grad_norm": 1.7973265130422078e-05,
"learning_rate": 4.6954318141992534e-08,
"loss": 0.0,
"step": 47600
},
{
"epoch": 5.786245536357365,
"grad_norm": 2.8360249416437e-05,
"learning_rate": 4.6819523636216586e-08,
"loss": 0.0,
"step": 47700
},
{
"epoch": 5.7983760301442775,
"grad_norm": 2.603951725177467e-05,
"learning_rate": 4.668472913044064e-08,
"loss": 0.0,
"step": 47800
},
{
"epoch": 5.81050652393119,
"grad_norm": 1.1267226909694728e-05,
"learning_rate": 4.654993462466469e-08,
"loss": 0.0,
"step": 47900
},
{
"epoch": 5.822637017718103,
"grad_norm": 4.544790499494411e-05,
"learning_rate": 4.641514011888875e-08,
"loss": 0.0,
"step": 48000
},
{
"epoch": 5.8347675115050155,
"grad_norm": 5.073808551969705e-06,
"learning_rate": 4.628034561311281e-08,
"loss": 0.0,
"step": 48100
},
{
"epoch": 5.846898005291928,
"grad_norm": 4.531604190560756e-06,
"learning_rate": 4.614555110733686e-08,
"loss": 0.0,
"step": 48200
},
{
"epoch": 5.859028499078841,
"grad_norm": 2.5783774617593735e-05,
"learning_rate": 4.6010756601560916e-08,
"loss": 0.0,
"step": 48300
},
{
"epoch": 5.8711589928657535,
"grad_norm": 1.8032031221082434e-05,
"learning_rate": 4.5875962095784975e-08,
"loss": 0.0,
"step": 48400
},
{
"epoch": 5.883289486652666,
"grad_norm": 2.339402271900326e-05,
"learning_rate": 4.574116759000903e-08,
"loss": 0.0,
"step": 48500
},
{
"epoch": 5.895419980439579,
"grad_norm": 1.0057786312245298e-05,
"learning_rate": 4.560637308423309e-08,
"loss": 0.0,
"step": 48600
},
{
"epoch": 5.9075504742264915,
"grad_norm": 1.81854484253563e-05,
"learning_rate": 4.547157857845714e-08,
"loss": 0.0,
"step": 48700
},
{
"epoch": 5.919680968013404,
"grad_norm": 1.9178327420377173e-05,
"learning_rate": 4.533678407268119e-08,
"loss": 0.0,
"step": 48800
},
{
"epoch": 5.931811461800317,
"grad_norm": 3.179845953127369e-05,
"learning_rate": 4.520198956690525e-08,
"loss": 0.0,
"step": 48900
},
{
"epoch": 5.9439419555872295,
"grad_norm": 2.309592491656076e-05,
"learning_rate": 4.5067195061129304e-08,
"loss": 0.0,
"step": 49000
},
{
"epoch": 5.956072449374142,
"grad_norm": 1.0060489330498967e-05,
"learning_rate": 4.4932400555353363e-08,
"loss": 0.0,
"step": 49100
},
{
"epoch": 5.968202943161055,
"grad_norm": 2.0553434296743944e-05,
"learning_rate": 4.4797606049577416e-08,
"loss": 0.0,
"step": 49200
},
{
"epoch": 5.9803334369479675,
"grad_norm": 3.8277423300314695e-05,
"learning_rate": 4.466281154380147e-08,
"loss": 0.0,
"step": 49300
},
{
"epoch": 5.99246393073488,
"grad_norm": 6.001651854603551e-06,
"learning_rate": 4.452801703802553e-08,
"loss": 0.0,
"step": 49400
},
{
"epoch": 6.004594424521793,
"grad_norm": 1.8310502127860673e-05,
"learning_rate": 4.4394570477307344e-08,
"loss": 0.0,
"step": 49500
},
{
"epoch": 6.0167249183087055,
"grad_norm": 2.0332221538410522e-05,
"learning_rate": 4.4259775971531397e-08,
"loss": 0.0,
"step": 49600
},
{
"epoch": 6.028855412095619,
"grad_norm": 5.2164625230943784e-05,
"learning_rate": 4.4124981465755456e-08,
"loss": 0.0,
"step": 49700
},
{
"epoch": 6.040985905882532,
"grad_norm": 3.2575280783930793e-05,
"learning_rate": 4.399018695997951e-08,
"loss": 0.0,
"step": 49800
},
{
"epoch": 6.053116399669444,
"grad_norm": 3.4000044252024963e-05,
"learning_rate": 4.385539245420357e-08,
"loss": 0.0,
"step": 49900
},
{
"epoch": 6.065246893456357,
"grad_norm": 3.062421455979347e-05,
"learning_rate": 4.372059794842762e-08,
"loss": 0.0,
"step": 50000
},
{
"epoch": 6.065246893456357,
"eval_loss": 6.81196823393293e-08,
"eval_runtime": 12731.5813,
"eval_samples_per_second": 33.152,
"eval_steps_per_second": 4.144,
"step": 50000
},
{
"epoch": 6.07737738724327,
"grad_norm": 2.012972254306078e-05,
"learning_rate": 4.358580344265167e-08,
"loss": 0.0,
"step": 50100
},
{
"epoch": 6.089507881030182,
"grad_norm": 1.4403743080038112e-05,
"learning_rate": 4.345100893687573e-08,
"loss": 0.0,
"step": 50200
},
{
"epoch": 6.101638374817095,
"grad_norm": 3.0759158107684925e-05,
"learning_rate": 4.3316214431099785e-08,
"loss": 0.0,
"step": 50300
},
{
"epoch": 6.113768868604008,
"grad_norm": 3.788939648075029e-05,
"learning_rate": 4.3181419925323844e-08,
"loss": 0.0,
"step": 50400
},
{
"epoch": 6.12589936239092,
"grad_norm": 2.2543508748640306e-05,
"learning_rate": 4.30466254195479e-08,
"loss": 0.0,
"step": 50500
},
{
"epoch": 6.138029856177833,
"grad_norm": 1.6207653970923275e-05,
"learning_rate": 4.291183091377195e-08,
"loss": 0.0,
"step": 50600
},
{
"epoch": 6.150160349964746,
"grad_norm": 6.610866421397077e-06,
"learning_rate": 4.277703640799601e-08,
"loss": 0.0,
"step": 50700
},
{
"epoch": 6.162290843751658,
"grad_norm": 1.5873067241045646e-05,
"learning_rate": 4.264224190222007e-08,
"loss": 0.0,
"step": 50800
},
{
"epoch": 6.174421337538571,
"grad_norm": 6.631801625189837e-06,
"learning_rate": 4.250744739644412e-08,
"loss": 0.0,
"step": 50900
},
{
"epoch": 6.186551831325484,
"grad_norm": 1.6326004697475582e-05,
"learning_rate": 4.2372652890668174e-08,
"loss": 0.0,
"step": 51000
},
{
"epoch": 6.198682325112396,
"grad_norm": 1.117045758292079e-05,
"learning_rate": 4.2237858384892226e-08,
"loss": 0.0,
"step": 51100
},
{
"epoch": 6.210812818899309,
"grad_norm": 7.856343472667504e-06,
"learning_rate": 4.2103063879116286e-08,
"loss": 0.0,
"step": 51200
},
{
"epoch": 6.222943312686222,
"grad_norm": 2.9008153433096595e-05,
"learning_rate": 4.1968269373340345e-08,
"loss": 0.0,
"step": 51300
},
{
"epoch": 6.235073806473134,
"grad_norm": 5.562766091316007e-06,
"learning_rate": 4.18334748675644e-08,
"loss": 0.0,
"step": 51400
},
{
"epoch": 6.247204300260048,
"grad_norm": 6.107033186708577e-06,
"learning_rate": 4.169868036178845e-08,
"loss": 0.0,
"step": 51500
},
{
"epoch": 6.259334794046961,
"grad_norm": 7.988614015630446e-06,
"learning_rate": 4.156388585601251e-08,
"loss": 0.0,
"step": 51600
},
{
"epoch": 6.271465287833873,
"grad_norm": 2.7623522328212857e-05,
"learning_rate": 4.142909135023656e-08,
"loss": 0.0,
"step": 51700
},
{
"epoch": 6.283595781620786,
"grad_norm": 3.2249143259832636e-05,
"learning_rate": 4.129429684446062e-08,
"loss": 0.0,
"step": 51800
},
{
"epoch": 6.295726275407699,
"grad_norm": 7.830405593267642e-06,
"learning_rate": 4.115950233868467e-08,
"loss": 0.0,
"step": 51900
},
{
"epoch": 6.307856769194611,
"grad_norm": 2.494780892448034e-05,
"learning_rate": 4.102470783290873e-08,
"loss": 0.0,
"step": 52000
},
{
"epoch": 6.319987262981524,
"grad_norm": 3.023298086191062e-05,
"learning_rate": 4.0889913327132786e-08,
"loss": 0.0,
"step": 52100
},
{
"epoch": 6.332117756768437,
"grad_norm": 1.9346003682585433e-05,
"learning_rate": 4.075511882135684e-08,
"loss": 0.0,
"step": 52200
},
{
"epoch": 6.344248250555349,
"grad_norm": 2.7946347472607158e-05,
"learning_rate": 4.06203243155809e-08,
"loss": 0.0,
"step": 52300
},
{
"epoch": 6.356378744342262,
"grad_norm": 1.0151994501939043e-05,
"learning_rate": 4.048552980980495e-08,
"loss": 0.0,
"step": 52400
},
{
"epoch": 6.368509238129175,
"grad_norm": 2.9524355340981856e-05,
"learning_rate": 4.0350735304029003e-08,
"loss": 0.0,
"step": 52500
},
{
"epoch": 6.380639731916087,
"grad_norm": 6.609189313167008e-06,
"learning_rate": 4.021594079825306e-08,
"loss": 0.0,
"step": 52600
},
{
"epoch": 6.392770225703,
"grad_norm": 1.3056687748758122e-05,
"learning_rate": 4.008114629247712e-08,
"loss": 0.0,
"step": 52700
},
{
"epoch": 6.404900719489913,
"grad_norm": 4.907285529043293e-06,
"learning_rate": 3.994635178670117e-08,
"loss": 0.0,
"step": 52800
},
{
"epoch": 6.417031213276825,
"grad_norm": 1.0377465514466166e-05,
"learning_rate": 3.981155728092523e-08,
"loss": 0.0,
"step": 52900
},
{
"epoch": 6.429161707063738,
"grad_norm": 1.003842680802336e-05,
"learning_rate": 3.967676277514928e-08,
"loss": 0.0,
"step": 53000
},
{
"epoch": 6.441292200850651,
"grad_norm": 7.876193194533698e-06,
"learning_rate": 3.954196826937334e-08,
"loss": 0.0,
"step": 53100
},
{
"epoch": 6.453422694637563,
"grad_norm": 4.641383384296205e-06,
"learning_rate": 3.94071737635974e-08,
"loss": 0.0,
"step": 53200
},
{
"epoch": 6.465553188424476,
"grad_norm": 5.234006493992638e-06,
"learning_rate": 3.9272379257821445e-08,
"loss": 0.0,
"step": 53300
},
{
"epoch": 6.477683682211389,
"grad_norm": 6.524358468595892e-06,
"learning_rate": 3.9137584752045504e-08,
"loss": 0.0,
"step": 53400
},
{
"epoch": 6.489814175998302,
"grad_norm": 2.1887204638915136e-05,
"learning_rate": 3.900279024626956e-08,
"loss": 0.0,
"step": 53500
},
{
"epoch": 6.501944669785215,
"grad_norm": 8.637114660814404e-06,
"learning_rate": 3.8867995740493616e-08,
"loss": 0.0,
"step": 53600
},
{
"epoch": 6.5140751635721275,
"grad_norm": 8.20979676063871e-06,
"learning_rate": 3.8733201234717675e-08,
"loss": 0.0,
"step": 53700
},
{
"epoch": 6.52620565735904,
"grad_norm": 4.2486276470299345e-06,
"learning_rate": 3.859840672894173e-08,
"loss": 0.0,
"step": 53800
},
{
"epoch": 6.538336151145953,
"grad_norm": 1.3307449080457445e-05,
"learning_rate": 3.846361222316578e-08,
"loss": 0.0,
"step": 53900
},
{
"epoch": 6.5504666449328655,
"grad_norm": 5.706210231437581e-06,
"learning_rate": 3.832881771738984e-08,
"loss": 0.0,
"step": 54000
},
{
"epoch": 6.562597138719778,
"grad_norm": 1.1756884305214044e-05,
"learning_rate": 3.819402321161389e-08,
"loss": 0.0,
"step": 54100
},
{
"epoch": 6.574727632506691,
"grad_norm": 6.582447895198129e-06,
"learning_rate": 3.8059228705837945e-08,
"loss": 0.0,
"step": 54200
},
{
"epoch": 6.5868581262936035,
"grad_norm": 1.4298544556368142e-05,
"learning_rate": 3.7924434200062004e-08,
"loss": 0.0,
"step": 54300
},
{
"epoch": 6.598988620080516,
"grad_norm": 1.3964708159619477e-05,
"learning_rate": 3.778963969428606e-08,
"loss": 0.0,
"step": 54400
},
{
"epoch": 6.611119113867429,
"grad_norm": 9.269981092074886e-06,
"learning_rate": 3.7654845188510116e-08,
"loss": 0.0,
"step": 54500
},
{
"epoch": 6.6232496076543415,
"grad_norm": 1.1989985978289042e-05,
"learning_rate": 3.7520050682734175e-08,
"loss": 0.0,
"step": 54600
},
{
"epoch": 6.635380101441254,
"grad_norm": 1.2308242730796337e-05,
"learning_rate": 3.738525617695822e-08,
"loss": 0.0,
"step": 54700
},
{
"epoch": 6.647510595228167,
"grad_norm": 1.537953721708618e-05,
"learning_rate": 3.725046167118228e-08,
"loss": 0.0,
"step": 54800
},
{
"epoch": 6.6596410890150795,
"grad_norm": 1.1343098776706029e-05,
"learning_rate": 3.711566716540634e-08,
"loss": 0.0,
"step": 54900
},
{
"epoch": 6.671771582801992,
"grad_norm": 1.3526327165891416e-05,
"learning_rate": 3.698087265963039e-08,
"loss": 0.0,
"step": 55000
},
{
"epoch": 6.683902076588905,
"grad_norm": 2.0161125576123595e-05,
"learning_rate": 3.6846078153854445e-08,
"loss": 0.0,
"step": 55100
},
{
"epoch": 6.696032570375818,
"grad_norm": 6.1126788750698324e-06,
"learning_rate": 3.67112836480785e-08,
"loss": 0.0,
"step": 55200
},
{
"epoch": 6.708163064162731,
"grad_norm": 4.200906005280558e-06,
"learning_rate": 3.657648914230256e-08,
"loss": 0.0,
"step": 55300
},
{
"epoch": 6.720293557949644,
"grad_norm": 4.649086349672871e-06,
"learning_rate": 3.6441694636526617e-08,
"loss": 0.0,
"step": 55400
},
{
"epoch": 6.732424051736556,
"grad_norm": 8.526422789145727e-06,
"learning_rate": 3.630690013075067e-08,
"loss": 0.0,
"step": 55500
},
{
"epoch": 6.744554545523469,
"grad_norm": 7.695515705563594e-06,
"learning_rate": 3.617210562497472e-08,
"loss": 0.0,
"step": 55600
},
{
"epoch": 6.756685039310382,
"grad_norm": 7.190765245468356e-06,
"learning_rate": 3.603731111919878e-08,
"loss": 0.0,
"step": 55700
},
{
"epoch": 6.768815533097294,
"grad_norm": 1.0542355994402897e-05,
"learning_rate": 3.5902516613422834e-08,
"loss": 0.0,
"step": 55800
},
{
"epoch": 6.780946026884207,
"grad_norm": 4.001772595074726e-06,
"learning_rate": 3.576772210764689e-08,
"loss": 0.0,
"step": 55900
},
{
"epoch": 6.79307652067112,
"grad_norm": 6.5062290559581015e-06,
"learning_rate": 3.5632927601870946e-08,
"loss": 0.0,
"step": 56000
},
{
"epoch": 6.805207014458032,
"grad_norm": 4.772533429786563e-06,
"learning_rate": 3.5498133096095e-08,
"loss": 0.0,
"step": 56100
},
{
"epoch": 6.817337508244945,
"grad_norm": 1.6134656107169576e-05,
"learning_rate": 3.536333859031906e-08,
"loss": 0.0,
"step": 56200
},
{
"epoch": 6.829468002031858,
"grad_norm": 5.383319603424752e-06,
"learning_rate": 3.522854408454311e-08,
"loss": 0.0,
"step": 56300
},
{
"epoch": 6.84159849581877,
"grad_norm": 6.041369488229975e-06,
"learning_rate": 3.509374957876717e-08,
"loss": 0.0,
"step": 56400
},
{
"epoch": 6.853728989605683,
"grad_norm": 4.105105290364008e-06,
"learning_rate": 3.495895507299122e-08,
"loss": 0.0,
"step": 56500
},
{
"epoch": 6.865859483392596,
"grad_norm": 7.601636298204539e-06,
"learning_rate": 3.4824160567215275e-08,
"loss": 0.0,
"step": 56600
},
{
"epoch": 6.877989977179508,
"grad_norm": 4.2275514715583995e-06,
"learning_rate": 3.4689366061439334e-08,
"loss": 0.0,
"step": 56700
},
{
"epoch": 6.890120470966421,
"grad_norm": 4.698940756497905e-06,
"learning_rate": 3.4554571555663394e-08,
"loss": 0.0,
"step": 56800
},
{
"epoch": 6.902250964753334,
"grad_norm": 1.0010462574427947e-05,
"learning_rate": 3.4419777049887446e-08,
"loss": 0.0,
"step": 56900
},
{
"epoch": 6.914381458540246,
"grad_norm": 5.1377683121245354e-06,
"learning_rate": 3.42849825441115e-08,
"loss": 0.0,
"step": 57000
},
{
"epoch": 6.926511952327159,
"grad_norm": 1.253222580999136e-05,
"learning_rate": 3.415018803833555e-08,
"loss": 0.0,
"step": 57100
},
{
"epoch": 6.938642446114072,
"grad_norm": 1.1987819561909419e-05,
"learning_rate": 3.401539353255961e-08,
"loss": 0.0,
"step": 57200
},
{
"epoch": 6.950772939900985,
"grad_norm": 7.4161894190183375e-06,
"learning_rate": 3.388059902678367e-08,
"loss": 0.0,
"step": 57300
},
{
"epoch": 6.962903433687898,
"grad_norm": 4.78677748105838e-06,
"learning_rate": 3.374580452100772e-08,
"loss": 0.0,
"step": 57400
},
{
"epoch": 6.9750339274748105,
"grad_norm": 1.2344567949185148e-05,
"learning_rate": 3.3611010015231775e-08,
"loss": 0.0,
"step": 57500
},
{
"epoch": 6.987164421261723,
"grad_norm": 4.33678769695689e-06,
"learning_rate": 3.3476215509455835e-08,
"loss": 0.0,
"step": 57600
},
{
"epoch": 6.999294915048636,
"grad_norm": 1.138456991611747e-05,
"learning_rate": 3.334142100367989e-08,
"loss": 0.0,
"step": 57700
},
{
"epoch": 7.0114254088355485,
"grad_norm": 1.339143818768207e-05,
"learning_rate": 3.3207974442961703e-08,
"loss": 0.0,
"step": 57800
},
{
"epoch": 7.023555902622461,
"grad_norm": 7.87022327131126e-06,
"learning_rate": 3.3073179937185756e-08,
"loss": 0.0,
"step": 57900
},
{
"epoch": 7.035686396409374,
"grad_norm": 4.470041858439799e-06,
"learning_rate": 3.2938385431409815e-08,
"loss": 0.0,
"step": 58000
},
{
"epoch": 7.0478168901962865,
"grad_norm": 5.98327596890158e-06,
"learning_rate": 3.280359092563387e-08,
"loss": 0.0,
"step": 58100
},
{
"epoch": 7.059947383983199,
"grad_norm": 6.159062650112901e-06,
"learning_rate": 3.266879641985793e-08,
"loss": 0.0,
"step": 58200
},
{
"epoch": 7.072077877770112,
"grad_norm": 1.162042190117063e-05,
"learning_rate": 3.253400191408198e-08,
"loss": 0.0,
"step": 58300
},
{
"epoch": 7.0842083715570245,
"grad_norm": 8.919217179936823e-06,
"learning_rate": 3.239920740830603e-08,
"loss": 0.0,
"step": 58400
},
{
"epoch": 7.096338865343937,
"grad_norm": 4.296573024475947e-06,
"learning_rate": 3.226441290253009e-08,
"loss": 0.0,
"step": 58500
},
{
"epoch": 7.10846935913085,
"grad_norm": 3.882557848555734e-06,
"learning_rate": 3.212961839675415e-08,
"loss": 0.0,
"step": 58600
},
{
"epoch": 7.1205998529177625,
"grad_norm": 1.2772562513418961e-05,
"learning_rate": 3.1994823890978204e-08,
"loss": 0.0,
"step": 58700
},
{
"epoch": 7.132730346704675,
"grad_norm": 1.5256729057000484e-05,
"learning_rate": 3.1860029385202257e-08,
"loss": 0.0,
"step": 58800
},
{
"epoch": 7.144860840491588,
"grad_norm": 1.280256674363045e-05,
"learning_rate": 3.1725234879426316e-08,
"loss": 0.0,
"step": 58900
},
{
"epoch": 7.156991334278501,
"grad_norm": 1.1524730325618293e-05,
"learning_rate": 3.159044037365037e-08,
"loss": 0.0,
"step": 59000
},
{
"epoch": 7.169121828065414,
"grad_norm": 4.267490567144705e-06,
"learning_rate": 3.145564586787443e-08,
"loss": 0.0,
"step": 59100
},
{
"epoch": 7.181252321852327,
"grad_norm": 5.201383373787394e-06,
"learning_rate": 3.132085136209848e-08,
"loss": 0.0,
"step": 59200
},
{
"epoch": 7.193382815639239,
"grad_norm": 1.4003146134200506e-05,
"learning_rate": 3.118605685632253e-08,
"loss": 0.0,
"step": 59300
},
{
"epoch": 7.205513309426152,
"grad_norm": 1.0509905223443639e-05,
"learning_rate": 3.105126235054659e-08,
"loss": 0.0,
"step": 59400
},
{
"epoch": 7.217643803213065,
"grad_norm": 8.109111149678938e-06,
"learning_rate": 3.0916467844770645e-08,
"loss": 0.0,
"step": 59500
},
{
"epoch": 7.229774296999977,
"grad_norm": 5.669727670465363e-06,
"learning_rate": 3.0781673338994704e-08,
"loss": 0.0,
"step": 59600
},
{
"epoch": 7.24190479078689,
"grad_norm": 4.445894774107728e-06,
"learning_rate": 3.064687883321876e-08,
"loss": 0.0,
"step": 59700
},
{
"epoch": 7.254035284573803,
"grad_norm": 4.052724307257449e-06,
"learning_rate": 3.051208432744281e-08,
"loss": 0.0,
"step": 59800
},
{
"epoch": 7.266165778360715,
"grad_norm": 1.6403826521127485e-05,
"learning_rate": 3.037728982166687e-08,
"loss": 0.0,
"step": 59900
},
{
"epoch": 7.278296272147628,
"grad_norm": 3.227706656616647e-06,
"learning_rate": 3.024249531589092e-08,
"loss": 0.0,
"step": 60000
},
{
"epoch": 7.278296272147628,
"eval_loss": 5.67662823414139e-08,
"eval_runtime": 12876.5575,
"eval_samples_per_second": 32.778,
"eval_steps_per_second": 4.097,
"step": 60000
},
{
"epoch": 7.290426765934541,
"grad_norm": 5.48078151041409e-06,
"learning_rate": 3.010770081011498e-08,
"loss": 0.0,
"step": 60100
},
{
"epoch": 7.302557259721453,
"grad_norm": 6.255341304495232e-06,
"learning_rate": 2.9972906304339034e-08,
"loss": 0.0,
"step": 60200
},
{
"epoch": 7.314687753508366,
"grad_norm": 7.36140327717294e-06,
"learning_rate": 2.9838111798563086e-08,
"loss": 0.0,
"step": 60300
},
{
"epoch": 7.326818247295279,
"grad_norm": 1.1941012417082675e-05,
"learning_rate": 2.9703317292787145e-08,
"loss": 0.0,
"step": 60400
},
{
"epoch": 7.338948741082191,
"grad_norm": 1.1292297131149098e-05,
"learning_rate": 2.95685227870112e-08,
"loss": 0.0,
"step": 60500
},
{
"epoch": 7.351079234869104,
"grad_norm": 3.3545572932780487e-06,
"learning_rate": 2.9433728281235254e-08,
"loss": 0.0,
"step": 60600
},
{
"epoch": 7.363209728656017,
"grad_norm": 1.2666420843743253e-05,
"learning_rate": 2.929893377545931e-08,
"loss": 0.0,
"step": 60700
},
{
"epoch": 7.375340222442929,
"grad_norm": 1.1807159353338648e-05,
"learning_rate": 2.916413926968337e-08,
"loss": 0.0,
"step": 60800
},
{
"epoch": 7.387470716229842,
"grad_norm": 1.095084189728368e-05,
"learning_rate": 2.9029344763907422e-08,
"loss": 0.0,
"step": 60900
},
{
"epoch": 7.399601210016756,
"grad_norm": 9.25906715565361e-06,
"learning_rate": 2.8894550258131478e-08,
"loss": 0.0,
"step": 61000
},
{
"epoch": 7.411731703803668,
"grad_norm": 7.390953214780893e-06,
"learning_rate": 2.875975575235553e-08,
"loss": 0.0,
"step": 61100
},
{
"epoch": 7.423862197590581,
"grad_norm": 6.920663963683182e-06,
"learning_rate": 2.8624961246579587e-08,
"loss": 0.0,
"step": 61200
},
{
"epoch": 7.435992691377494,
"grad_norm": 7.917548828118015e-06,
"learning_rate": 2.8490166740803646e-08,
"loss": 0.0,
"step": 61300
},
{
"epoch": 7.448123185164406,
"grad_norm": 5.376084118324798e-06,
"learning_rate": 2.83553722350277e-08,
"loss": 0.0,
"step": 61400
},
{
"epoch": 7.460253678951319,
"grad_norm": 4.198305305180838e-06,
"learning_rate": 2.8220577729251755e-08,
"loss": 0.0,
"step": 61500
},
{
"epoch": 7.472384172738232,
"grad_norm": 4.916752004646696e-06,
"learning_rate": 2.808578322347581e-08,
"loss": 0.0,
"step": 61600
},
{
"epoch": 7.484514666525144,
"grad_norm": 1.2863613847002853e-05,
"learning_rate": 2.7950988717699863e-08,
"loss": 0.0,
"step": 61700
},
{
"epoch": 7.496645160312057,
"grad_norm": 1.0277097317157313e-05,
"learning_rate": 2.7817542156981682e-08,
"loss": 0.0,
"step": 61800
},
{
"epoch": 7.50877565409897,
"grad_norm": 1.250240620720433e-05,
"learning_rate": 2.7682747651205735e-08,
"loss": 0.0,
"step": 61900
},
{
"epoch": 7.520906147885882,
"grad_norm": 9.138646419160068e-06,
"learning_rate": 2.754795314542979e-08,
"loss": 0.0,
"step": 62000
},
{
"epoch": 7.533036641672795,
"grad_norm": 7.881306373747066e-06,
"learning_rate": 2.7413158639653844e-08,
"loss": 0.0,
"step": 62100
},
{
"epoch": 7.545167135459708,
"grad_norm": 8.445715138805099e-06,
"learning_rate": 2.72783641338779e-08,
"loss": 0.0,
"step": 62200
},
{
"epoch": 7.55729762924662,
"grad_norm": 3.7977299598423997e-06,
"learning_rate": 2.714356962810196e-08,
"loss": 0.0,
"step": 62300
},
{
"epoch": 7.569428123033533,
"grad_norm": 5.859428711119108e-06,
"learning_rate": 2.7008775122326012e-08,
"loss": 0.0,
"step": 62400
},
{
"epoch": 7.581558616820446,
"grad_norm": 3.878424195136176e-06,
"learning_rate": 2.6873980616550068e-08,
"loss": 0.0,
"step": 62500
},
{
"epoch": 7.593689110607358,
"grad_norm": 6.818550446041627e-06,
"learning_rate": 2.6739186110774127e-08,
"loss": 0.0,
"step": 62600
},
{
"epoch": 7.605819604394272,
"grad_norm": 5.862772013642825e-06,
"learning_rate": 2.6604391604998176e-08,
"loss": 0.0,
"step": 62700
},
{
"epoch": 7.6179500981811845,
"grad_norm": 9.112359293794725e-06,
"learning_rate": 2.6469597099222236e-08,
"loss": 0.0,
"step": 62800
},
{
"epoch": 7.630080591968097,
"grad_norm": 6.844359177193837e-06,
"learning_rate": 2.633480259344629e-08,
"loss": 0.0,
"step": 62900
},
{
"epoch": 7.64221108575501,
"grad_norm": 1.056177916325396e-05,
"learning_rate": 2.6200008087670344e-08,
"loss": 0.0,
"step": 63000
},
{
"epoch": 7.6543415795419225,
"grad_norm": 1.6292913642246276e-05,
"learning_rate": 2.6065213581894403e-08,
"loss": 0.0,
"step": 63100
},
{
"epoch": 7.666472073328835,
"grad_norm": 7.939475835883059e-06,
"learning_rate": 2.5930419076118453e-08,
"loss": 0.0,
"step": 63200
},
{
"epoch": 7.678602567115748,
"grad_norm": 1.4974369150877465e-05,
"learning_rate": 2.5795624570342512e-08,
"loss": 0.0,
"step": 63300
},
{
"epoch": 7.6907330609026605,
"grad_norm": 5.687543762178393e-06,
"learning_rate": 2.5660830064566568e-08,
"loss": 0.0,
"step": 63400
},
{
"epoch": 7.702863554689573,
"grad_norm": 8.041168257477693e-06,
"learning_rate": 2.552603555879062e-08,
"loss": 0.0,
"step": 63500
},
{
"epoch": 7.714994048476486,
"grad_norm": 9.756033250596374e-06,
"learning_rate": 2.5391241053014677e-08,
"loss": 0.0,
"step": 63600
},
{
"epoch": 7.7271245422633985,
"grad_norm": 8.254312888311688e-06,
"learning_rate": 2.5256446547238736e-08,
"loss": 0.0,
"step": 63700
},
{
"epoch": 7.739255036050311,
"grad_norm": 8.304762559419032e-06,
"learning_rate": 2.512165204146279e-08,
"loss": 0.0,
"step": 63800
},
{
"epoch": 7.751385529837224,
"grad_norm": 9.369220606458839e-06,
"learning_rate": 2.4988205480744605e-08,
"loss": 0.0,
"step": 63900
},
{
"epoch": 7.7635160236241365,
"grad_norm": 4.313039880798897e-06,
"learning_rate": 2.4853410974968657e-08,
"loss": 0.0,
"step": 64000
},
{
"epoch": 7.775646517411049,
"grad_norm": 1.0110463335877284e-05,
"learning_rate": 2.4718616469192717e-08,
"loss": 0.0,
"step": 64100
},
{
"epoch": 7.787777011197962,
"grad_norm": 5.044106728746556e-06,
"learning_rate": 2.458382196341677e-08,
"loss": 0.0,
"step": 64200
},
{
"epoch": 7.7999075049848745,
"grad_norm": 1.0012697202910203e-05,
"learning_rate": 2.4449027457640825e-08,
"loss": 0.0,
"step": 64300
},
{
"epoch": 7.812037998771787,
"grad_norm": 7.5458015089679975e-06,
"learning_rate": 2.431423295186488e-08,
"loss": 0.0,
"step": 64400
},
{
"epoch": 7.8241684925587,
"grad_norm": 6.187547569425078e-06,
"learning_rate": 2.4179438446088937e-08,
"loss": 0.0,
"step": 64500
},
{
"epoch": 7.8362989863456125,
"grad_norm": 6.914489858900197e-06,
"learning_rate": 2.4044643940312993e-08,
"loss": 0.0,
"step": 64600
},
{
"epoch": 7.848429480132525,
"grad_norm": 7.281971647898899e-06,
"learning_rate": 2.3909849434537046e-08,
"loss": 0.0,
"step": 64700
},
{
"epoch": 7.860559973919439,
"grad_norm": 1.0793314686452504e-05,
"learning_rate": 2.3775054928761102e-08,
"loss": 0.0,
"step": 64800
},
{
"epoch": 7.872690467706351,
"grad_norm": 8.61198077473091e-06,
"learning_rate": 2.3640260422985158e-08,
"loss": 0.0,
"step": 64900
},
{
"epoch": 7.884820961493264,
"grad_norm": 7.743517926428467e-06,
"learning_rate": 2.3505465917209214e-08,
"loss": 0.0,
"step": 65000
},
{
"epoch": 7.896951455280177,
"grad_norm": 6.104731710365741e-06,
"learning_rate": 2.337067141143327e-08,
"loss": 0.0,
"step": 65100
},
{
"epoch": 7.909081949067089,
"grad_norm": 1.2164388863311615e-05,
"learning_rate": 2.3235876905657326e-08,
"loss": 0.0,
"step": 65200
},
{
"epoch": 7.921212442854002,
"grad_norm": 8.761631761444733e-06,
"learning_rate": 2.310108239988138e-08,
"loss": 0.0,
"step": 65300
},
{
"epoch": 7.933342936640915,
"grad_norm": 5.0737071433104575e-06,
"learning_rate": 2.2966287894105434e-08,
"loss": 0.0,
"step": 65400
},
{
"epoch": 7.945473430427827,
"grad_norm": 1.1595148862397764e-05,
"learning_rate": 2.283149338832949e-08,
"loss": 0.0,
"step": 65500
},
{
"epoch": 7.95760392421474,
"grad_norm": 1.36161434056703e-05,
"learning_rate": 2.2696698882553546e-08,
"loss": 0.0,
"step": 65600
},
{
"epoch": 7.969734418001653,
"grad_norm": 5.490317107614828e-06,
"learning_rate": 2.2561904376777602e-08,
"loss": 0.0,
"step": 65700
},
{
"epoch": 7.981864911788565,
"grad_norm": 3.685940328068682e-06,
"learning_rate": 2.2427109871001655e-08,
"loss": 0.0,
"step": 65800
},
{
"epoch": 7.993995405575478,
"grad_norm": 3.629038928920636e-06,
"learning_rate": 2.2293663310283474e-08,
"loss": 0.0,
"step": 65900
},
{
"epoch": 8.006125899362392,
"grad_norm": 7.837747943995055e-06,
"learning_rate": 2.2160216749565287e-08,
"loss": 0.0,
"step": 66000
},
{
"epoch": 8.018256393149304,
"grad_norm": 5.70934435017989e-06,
"learning_rate": 2.2025422243789343e-08,
"loss": 0.0,
"step": 66100
},
{
"epoch": 8.030386886936217,
"grad_norm": 5.7746415222936776e-06,
"learning_rate": 2.18906277380134e-08,
"loss": 0.0,
"step": 66200
},
{
"epoch": 8.04251738072313,
"grad_norm": 3.810003818216501e-06,
"learning_rate": 2.175583323223745e-08,
"loss": 0.0,
"step": 66300
},
{
"epoch": 8.054647874510042,
"grad_norm": 1.444386816729093e-05,
"learning_rate": 2.1621038726461507e-08,
"loss": 0.0,
"step": 66400
},
{
"epoch": 8.066778368296955,
"grad_norm": 9.787572707864456e-06,
"learning_rate": 2.1486244220685563e-08,
"loss": 0.0,
"step": 66500
},
{
"epoch": 8.078908862083868,
"grad_norm": 6.40834969090065e-06,
"learning_rate": 2.135144971490962e-08,
"loss": 0.0,
"step": 66600
},
{
"epoch": 8.09103935587078,
"grad_norm": 1.2682895430771168e-05,
"learning_rate": 2.1216655209133675e-08,
"loss": 0.0,
"step": 66700
},
{
"epoch": 8.103169849657693,
"grad_norm": 1.2579374015331268e-05,
"learning_rate": 2.1081860703357728e-08,
"loss": 0.0,
"step": 66800
},
{
"epoch": 8.115300343444606,
"grad_norm": 6.404700343409786e-06,
"learning_rate": 2.0947066197581787e-08,
"loss": 0.0,
"step": 66900
},
{
"epoch": 8.127430837231518,
"grad_norm": 5.5169152801681776e-06,
"learning_rate": 2.081227169180584e-08,
"loss": 0.0,
"step": 67000
},
{
"epoch": 8.139561331018431,
"grad_norm": 8.979187441582326e-06,
"learning_rate": 2.0677477186029896e-08,
"loss": 0.0,
"step": 67100
},
{
"epoch": 8.151691824805344,
"grad_norm": 1.1302987331873737e-05,
"learning_rate": 2.0542682680253952e-08,
"loss": 0.0,
"step": 67200
},
{
"epoch": 8.163822318592256,
"grad_norm": 1.4383387679117732e-05,
"learning_rate": 2.0407888174478008e-08,
"loss": 0.0,
"step": 67300
},
{
"epoch": 8.175952812379169,
"grad_norm": 1.563242585689295e-05,
"learning_rate": 2.0273093668702064e-08,
"loss": 0.0,
"step": 67400
},
{
"epoch": 8.188083306166082,
"grad_norm": 6.9619700298062526e-06,
"learning_rate": 2.0138299162926116e-08,
"loss": 0.0,
"step": 67500
},
{
"epoch": 8.200213799952994,
"grad_norm": 1.1879750672960654e-05,
"learning_rate": 2.0003504657150176e-08,
"loss": 0.0,
"step": 67600
},
{
"epoch": 8.212344293739907,
"grad_norm": 7.085599918355001e-06,
"learning_rate": 1.986871015137423e-08,
"loss": 0.0,
"step": 67700
},
{
"epoch": 8.22447478752682,
"grad_norm": 1.2863686606578995e-05,
"learning_rate": 1.9733915645598284e-08,
"loss": 0.0,
"step": 67800
},
{
"epoch": 8.236605281313732,
"grad_norm": 9.520225830783602e-06,
"learning_rate": 1.959912113982234e-08,
"loss": 0.0,
"step": 67900
},
{
"epoch": 8.248735775100645,
"grad_norm": 1.3260582818475086e-05,
"learning_rate": 1.9464326634046396e-08,
"loss": 0.0,
"step": 68000
},
{
"epoch": 8.260866268887558,
"grad_norm": 9.987468729377724e-06,
"learning_rate": 1.9329532128270452e-08,
"loss": 0.0,
"step": 68100
},
{
"epoch": 8.27299676267447,
"grad_norm": 1.1559543054318056e-05,
"learning_rate": 1.9194737622494505e-08,
"loss": 0.0,
"step": 68200
},
{
"epoch": 8.285127256461383,
"grad_norm": 7.925504178274423e-06,
"learning_rate": 1.905994311671856e-08,
"loss": 0.0,
"step": 68300
},
{
"epoch": 8.297257750248296,
"grad_norm": 4.772101874550572e-06,
"learning_rate": 1.8925148610942617e-08,
"loss": 0.0,
"step": 68400
},
{
"epoch": 8.309388244035208,
"grad_norm": 9.931142813002225e-06,
"learning_rate": 1.8790354105166673e-08,
"loss": 0.0,
"step": 68500
},
{
"epoch": 8.321518737822121,
"grad_norm": 9.558188139635604e-06,
"learning_rate": 1.8655559599390726e-08,
"loss": 0.0,
"step": 68600
},
{
"epoch": 8.333649231609034,
"grad_norm": 5.009603682992747e-06,
"learning_rate": 1.8520765093614785e-08,
"loss": 0.0,
"step": 68700
},
{
"epoch": 8.345779725395946,
"grad_norm": 1.3090863831166644e-05,
"learning_rate": 1.838597058783884e-08,
"loss": 0.0,
"step": 68800
},
{
"epoch": 8.357910219182859,
"grad_norm": 1.2672931916313246e-05,
"learning_rate": 1.8252524027120653e-08,
"loss": 0.0,
"step": 68900
},
{
"epoch": 8.370040712969772,
"grad_norm": 1.1767956493713427e-05,
"learning_rate": 1.811772952134471e-08,
"loss": 0.0,
"step": 69000
},
{
"epoch": 8.382171206756684,
"grad_norm": 1.4513515452563297e-05,
"learning_rate": 1.7982935015568765e-08,
"loss": 0.0,
"step": 69100
},
{
"epoch": 8.394301700543597,
"grad_norm": 4.3318741518305615e-06,
"learning_rate": 1.7848140509792818e-08,
"loss": 0.0,
"step": 69200
},
{
"epoch": 8.40643219433051,
"grad_norm": 1.4126319911156315e-05,
"learning_rate": 1.7713346004016874e-08,
"loss": 0.0,
"step": 69300
},
{
"epoch": 8.418562688117424,
"grad_norm": 9.831867828324903e-06,
"learning_rate": 1.757855149824093e-08,
"loss": 0.0,
"step": 69400
},
{
"epoch": 8.430693181904337,
"grad_norm": 1.695728860795498e-05,
"learning_rate": 1.7443756992464986e-08,
"loss": 0.0,
"step": 69500
},
{
"epoch": 8.44282367569125,
"grad_norm": 5.640126801154111e-06,
"learning_rate": 1.7308962486689042e-08,
"loss": 0.0,
"step": 69600
},
{
"epoch": 8.454954169478162,
"grad_norm": 9.565230357111432e-06,
"learning_rate": 1.7174167980913098e-08,
"loss": 0.0,
"step": 69700
},
{
"epoch": 8.467084663265075,
"grad_norm": 4.890798209089553e-06,
"learning_rate": 1.7039373475137154e-08,
"loss": 0.0,
"step": 69800
},
{
"epoch": 8.479215157051987,
"grad_norm": 3.091217422479531e-06,
"learning_rate": 1.6904578969361207e-08,
"loss": 0.0,
"step": 69900
},
{
"epoch": 8.4913456508389,
"grad_norm": 3.491951474643429e-06,
"learning_rate": 1.6769784463585263e-08,
"loss": 0.0,
"step": 70000
},
{
"epoch": 8.4913456508389,
"eval_loss": 5.676630721040965e-08,
"eval_runtime": 12832.5783,
"eval_samples_per_second": 32.891,
"eval_steps_per_second": 4.111,
"step": 70000
},
{
"epoch": 8.503476144625813,
"grad_norm": 1.0302742339263204e-05,
"learning_rate": 1.663498995780932e-08,
"loss": 0.0,
"step": 70100
},
{
"epoch": 8.515606638412725,
"grad_norm": 6.76013269185205e-06,
"learning_rate": 1.6500195452033375e-08,
"loss": 0.0,
"step": 70200
},
{
"epoch": 8.527737132199638,
"grad_norm": 4.191508651274489e-06,
"learning_rate": 1.636540094625743e-08,
"loss": 0.0,
"step": 70300
},
{
"epoch": 8.53986762598655,
"grad_norm": 9.216681064572185e-06,
"learning_rate": 1.6230606440481483e-08,
"loss": 0.0,
"step": 70400
},
{
"epoch": 8.551998119773463,
"grad_norm": 1.008288290904602e-05,
"learning_rate": 1.6095811934705542e-08,
"loss": 0.0,
"step": 70500
},
{
"epoch": 8.564128613560376,
"grad_norm": 3.22926530316181e-06,
"learning_rate": 1.5961017428929595e-08,
"loss": 0.0,
"step": 70600
},
{
"epoch": 8.576259107347289,
"grad_norm": 4.7994108172133565e-06,
"learning_rate": 1.582622292315365e-08,
"loss": 0.0,
"step": 70700
},
{
"epoch": 8.588389601134201,
"grad_norm": 6.952599051146535e-06,
"learning_rate": 1.5691428417377707e-08,
"loss": 0.0,
"step": 70800
},
{
"epoch": 8.600520094921114,
"grad_norm": 3.6921103401255095e-06,
"learning_rate": 1.5556633911601763e-08,
"loss": 0.0,
"step": 70900
},
{
"epoch": 8.612650588708027,
"grad_norm": 8.551131941203494e-06,
"learning_rate": 1.542183940582582e-08,
"loss": 0.0,
"step": 71000
},
{
"epoch": 8.62478108249494,
"grad_norm": 5.6051849242066965e-06,
"learning_rate": 1.5288392845107635e-08,
"loss": 0.0,
"step": 71100
},
{
"epoch": 8.636911576281852,
"grad_norm": 4.959318630426424e-06,
"learning_rate": 1.5153598339331688e-08,
"loss": 0.0,
"step": 71200
},
{
"epoch": 8.649042070068765,
"grad_norm": 5.334877187124221e-06,
"learning_rate": 1.5018803833555744e-08,
"loss": 0.0,
"step": 71300
},
{
"epoch": 8.661172563855677,
"grad_norm": 9.519723789708223e-06,
"learning_rate": 1.4884009327779798e-08,
"loss": 0.0,
"step": 71400
},
{
"epoch": 8.67330305764259,
"grad_norm": 8.972290743258782e-06,
"learning_rate": 1.4749214822003856e-08,
"loss": 0.0,
"step": 71500
},
{
"epoch": 8.685433551429503,
"grad_norm": 9.876139301923104e-06,
"learning_rate": 1.461442031622791e-08,
"loss": 0.0,
"step": 71600
},
{
"epoch": 8.697564045216415,
"grad_norm": 5.264038918539882e-06,
"learning_rate": 1.4479625810451964e-08,
"loss": 0.0,
"step": 71700
},
{
"epoch": 8.709694539003328,
"grad_norm": 6.115679752838332e-06,
"learning_rate": 1.434483130467602e-08,
"loss": 0.0,
"step": 71800
},
{
"epoch": 8.72182503279024,
"grad_norm": 1.0554780601523817e-05,
"learning_rate": 1.4210036798900076e-08,
"loss": 0.0,
"step": 71900
},
{
"epoch": 8.733955526577153,
"grad_norm": 2.050689909083303e-05,
"learning_rate": 1.4075242293124132e-08,
"loss": 0.0,
"step": 72000
},
{
"epoch": 8.746086020364066,
"grad_norm": 4.232292212691391e-06,
"learning_rate": 1.3940447787348186e-08,
"loss": 0.0,
"step": 72100
},
{
"epoch": 8.758216514150979,
"grad_norm": 1.164754849014571e-05,
"learning_rate": 1.3805653281572244e-08,
"loss": 0.0,
"step": 72200
},
{
"epoch": 8.770347007937891,
"grad_norm": 7.335055670409929e-06,
"learning_rate": 1.3670858775796298e-08,
"loss": 0.0,
"step": 72300
},
{
"epoch": 8.782477501724804,
"grad_norm": 4.493634151003789e-06,
"learning_rate": 1.3536064270020353e-08,
"loss": 0.0,
"step": 72400
},
{
"epoch": 8.794607995511717,
"grad_norm": 6.525173830596032e-06,
"learning_rate": 1.3401269764244407e-08,
"loss": 0.0,
"step": 72500
},
{
"epoch": 8.80673848929863,
"grad_norm": 3.5145178571838187e-06,
"learning_rate": 1.3266475258468465e-08,
"loss": 0.0,
"step": 72600
},
{
"epoch": 8.818868983085542,
"grad_norm": 7.61439468988101e-06,
"learning_rate": 1.313168075269252e-08,
"loss": 0.0,
"step": 72700
},
{
"epoch": 8.830999476872455,
"grad_norm": 9.239704013452865e-06,
"learning_rate": 1.2996886246916575e-08,
"loss": 0.0,
"step": 72800
},
{
"epoch": 8.843129970659367,
"grad_norm": 5.969765879854094e-06,
"learning_rate": 1.286209174114063e-08,
"loss": 0.0,
"step": 72900
},
{
"epoch": 8.855260464446282,
"grad_norm": 1.1594494026212487e-05,
"learning_rate": 1.2727297235364687e-08,
"loss": 0.0,
"step": 73000
},
{
"epoch": 8.867390958233194,
"grad_norm": 7.431879112118622e-06,
"learning_rate": 1.2592502729588741e-08,
"loss": 0.0,
"step": 73100
},
{
"epoch": 8.879521452020107,
"grad_norm": 7.179428394010756e-06,
"learning_rate": 1.2457708223812795e-08,
"loss": 0.0,
"step": 73200
},
{
"epoch": 8.89165194580702,
"grad_norm": 7.79450510890456e-06,
"learning_rate": 1.2322913718036851e-08,
"loss": 0.0,
"step": 73300
},
{
"epoch": 8.903782439593932,
"grad_norm": 7.0768387558928225e-06,
"learning_rate": 1.2188119212260909e-08,
"loss": 0.0,
"step": 73400
},
{
"epoch": 8.915912933380845,
"grad_norm": 9.209243216901086e-06,
"learning_rate": 1.2053324706484963e-08,
"loss": 0.0,
"step": 73500
},
{
"epoch": 8.928043427167758,
"grad_norm": 6.6513057390693575e-06,
"learning_rate": 1.191853020070902e-08,
"loss": 0.0,
"step": 73600
},
{
"epoch": 8.94017392095467,
"grad_norm": 4.44849638370215e-06,
"learning_rate": 1.1783735694933074e-08,
"loss": 0.0,
"step": 73700
},
{
"epoch": 8.952304414741583,
"grad_norm": 3.2620855563436635e-06,
"learning_rate": 1.164894118915713e-08,
"loss": 0.0,
"step": 73800
},
{
"epoch": 8.964434908528496,
"grad_norm": 8.767606232140679e-06,
"learning_rate": 1.1514146683381184e-08,
"loss": 0.0,
"step": 73900
},
{
"epoch": 8.976565402315408,
"grad_norm": 4.0526033444621135e-06,
"learning_rate": 1.137935217760524e-08,
"loss": 0.0,
"step": 74000
},
{
"epoch": 8.988695896102321,
"grad_norm": 6.989345820329618e-06,
"learning_rate": 1.1244557671829296e-08,
"loss": 0.0,
"step": 74100
},
{
"epoch": 9.000826389889234,
"grad_norm": 5.908458206249634e-06,
"learning_rate": 1.1109763166053352e-08,
"loss": 0.0,
"step": 74200
},
{
"epoch": 9.012956883676146,
"grad_norm": 3.4905781376437517e-06,
"learning_rate": 1.0974968660277406e-08,
"loss": 0.0,
"step": 74300
},
{
"epoch": 9.02508737746306,
"grad_norm": 9.163719369098544e-06,
"learning_rate": 1.0840174154501462e-08,
"loss": 0.0,
"step": 74400
},
{
"epoch": 9.037217871249972,
"grad_norm": 1.190317652799422e-05,
"learning_rate": 1.0705379648725517e-08,
"loss": 0.0,
"step": 74500
},
{
"epoch": 9.049348365036884,
"grad_norm": 7.547297627752414e-06,
"learning_rate": 1.0570585142949572e-08,
"loss": 0.0,
"step": 74600
},
{
"epoch": 9.061478858823797,
"grad_norm": 1.0041652785730548e-05,
"learning_rate": 1.0435790637173628e-08,
"loss": 0.0,
"step": 74700
},
{
"epoch": 9.07360935261071,
"grad_norm": 6.380136255756952e-06,
"learning_rate": 1.0300996131397684e-08,
"loss": 0.0,
"step": 74800
},
{
"epoch": 9.085739846397622,
"grad_norm": 1.0097122867591679e-05,
"learning_rate": 1.016620162562174e-08,
"loss": 0.0,
"step": 74900
},
{
"epoch": 9.097870340184535,
"grad_norm": 7.386913239315618e-06,
"learning_rate": 1.0031407119845795e-08,
"loss": 0.0,
"step": 75000
},
{
"epoch": 9.110000833971448,
"grad_norm": 1.3795511222269852e-05,
"learning_rate": 9.897960559127609e-09,
"loss": 0.0,
"step": 75100
},
{
"epoch": 9.12213132775836,
"grad_norm": 4.901587999484036e-06,
"learning_rate": 9.763166053351665e-09,
"loss": 0.0,
"step": 75200
},
{
"epoch": 9.134261821545273,
"grad_norm": 6.113995368650649e-06,
"learning_rate": 9.62837154757572e-09,
"loss": 0.0,
"step": 75300
},
{
"epoch": 9.146392315332186,
"grad_norm": 6.835588919784641e-06,
"learning_rate": 9.493577041799775e-09,
"loss": 0.0,
"step": 75400
},
{
"epoch": 9.158522809119098,
"grad_norm": 4.5478086576622445e-06,
"learning_rate": 9.358782536023831e-09,
"loss": 0.0,
"step": 75500
},
{
"epoch": 9.170653302906011,
"grad_norm": 7.497304068238009e-06,
"learning_rate": 9.223988030247887e-09,
"loss": 0.0,
"step": 75600
},
{
"epoch": 9.182783796692924,
"grad_norm": 6.143738119135378e-06,
"learning_rate": 9.089193524471942e-09,
"loss": 0.0,
"step": 75700
},
{
"epoch": 9.194914290479836,
"grad_norm": 7.741902663838118e-06,
"learning_rate": 8.954399018695998e-09,
"loss": 0.0,
"step": 75800
},
{
"epoch": 9.207044784266749,
"grad_norm": 3.56682039637235e-06,
"learning_rate": 8.819604512920053e-09,
"loss": 0.0,
"step": 75900
},
{
"epoch": 9.219175278053662,
"grad_norm": 3.6248711694497615e-06,
"learning_rate": 8.684810007144108e-09,
"loss": 0.0,
"step": 76000
},
{
"epoch": 9.231305771840574,
"grad_norm": 8.108417205221485e-06,
"learning_rate": 8.550015501368164e-09,
"loss": 0.0,
"step": 76100
},
{
"epoch": 9.243436265627487,
"grad_norm": 4.215437002130784e-06,
"learning_rate": 8.41522099559222e-09,
"loss": 0.0,
"step": 76200
},
{
"epoch": 9.2555667594144,
"grad_norm": 3.4193940336990636e-06,
"learning_rate": 8.280426489816276e-09,
"loss": 0.0,
"step": 76300
},
{
"epoch": 9.267697253201312,
"grad_norm": 6.604200279980432e-06,
"learning_rate": 8.14563198404033e-09,
"loss": 0.0,
"step": 76400
},
{
"epoch": 9.279827746988225,
"grad_norm": 1.4923987691872753e-05,
"learning_rate": 8.010837478264386e-09,
"loss": 0.0,
"step": 76500
},
{
"epoch": 9.291958240775138,
"grad_norm": 3.4648105611267965e-06,
"learning_rate": 7.87604297248844e-09,
"loss": 0.0,
"step": 76600
},
{
"epoch": 9.30408873456205,
"grad_norm": 4.845314379053889e-06,
"learning_rate": 7.741248466712496e-09,
"loss": 0.0,
"step": 76700
},
{
"epoch": 9.316219228348963,
"grad_norm": 1.45012054417748e-05,
"learning_rate": 7.60645396093655e-09,
"loss": 0.0,
"step": 76800
},
{
"epoch": 9.328349722135878,
"grad_norm": 9.72646193986293e-06,
"learning_rate": 7.471659455160607e-09,
"loss": 0.0,
"step": 76900
},
{
"epoch": 9.34048021592279,
"grad_norm": 1.138799598265905e-05,
"learning_rate": 7.336864949384662e-09,
"loss": 0.0,
"step": 77000
},
{
"epoch": 9.352610709709703,
"grad_norm": 1.1094990441051777e-05,
"learning_rate": 7.2020704436087185e-09,
"loss": 0.0,
"step": 77100
},
{
"epoch": 9.364741203496616,
"grad_norm": 5.859043994860258e-06,
"learning_rate": 7.068623882890533e-09,
"loss": 0.0,
"step": 77200
},
{
"epoch": 9.376871697283528,
"grad_norm": 5.342010808817577e-06,
"learning_rate": 6.933829377114589e-09,
"loss": 0.0,
"step": 77300
},
{
"epoch": 9.38900219107044,
"grad_norm": 1.1032276233891025e-05,
"learning_rate": 6.799034871338644e-09,
"loss": 0.0,
"step": 77400
},
{
"epoch": 9.401132684857354,
"grad_norm": 9.584249710314907e-06,
"learning_rate": 6.6642403655627e-09,
"loss": 0.0,
"step": 77500
},
{
"epoch": 9.413263178644266,
"grad_norm": 5.221731044002809e-06,
"learning_rate": 6.529445859786754e-09,
"loss": 0.0,
"step": 77600
},
{
"epoch": 9.425393672431179,
"grad_norm": 9.893785318126902e-06,
"learning_rate": 6.39465135401081e-09,
"loss": 0.0,
"step": 77700
},
{
"epoch": 9.437524166218092,
"grad_norm": 5.504544787982013e-06,
"learning_rate": 6.259856848234865e-09,
"loss": 0.0,
"step": 77800
},
{
"epoch": 9.449654660005004,
"grad_norm": 7.359417395491619e-06,
"learning_rate": 6.125062342458921e-09,
"loss": 0.0,
"step": 77900
},
{
"epoch": 9.461785153791917,
"grad_norm": 6.077909802115755e-06,
"learning_rate": 5.9902678366829765e-09,
"loss": 0.0,
"step": 78000
},
{
"epoch": 9.47391564757883,
"grad_norm": 6.336029855447123e-06,
"learning_rate": 5.855473330907032e-09,
"loss": 0.0,
"step": 78100
},
{
"epoch": 9.486046141365742,
"grad_norm": 1.0881180060096085e-05,
"learning_rate": 5.720678825131088e-09,
"loss": 0.0,
"step": 78200
},
{
"epoch": 9.498176635152655,
"grad_norm": 4.356296358309919e-06,
"learning_rate": 5.585884319355143e-09,
"loss": 0.0,
"step": 78300
},
{
"epoch": 9.510307128939568,
"grad_norm": 6.603406745853135e-06,
"learning_rate": 5.451089813579198e-09,
"loss": 0.0,
"step": 78400
},
{
"epoch": 9.52243762272648,
"grad_norm": 6.903046141815139e-06,
"learning_rate": 5.316295307803254e-09,
"loss": 0.0,
"step": 78500
},
{
"epoch": 9.534568116513393,
"grad_norm": 3.507041128614219e-06,
"learning_rate": 5.181500802027309e-09,
"loss": 0.0,
"step": 78600
},
{
"epoch": 9.546698610300306,
"grad_norm": 3.092974566243356e-06,
"learning_rate": 5.046706296251364e-09,
"loss": 0.0,
"step": 78700
},
{
"epoch": 9.558829104087218,
"grad_norm": 1.3427334124571644e-05,
"learning_rate": 4.911911790475419e-09,
"loss": 0.0,
"step": 78800
},
{
"epoch": 9.57095959787413,
"grad_norm": 1.3484379451256245e-05,
"learning_rate": 4.777117284699476e-09,
"loss": 0.0,
"step": 78900
},
{
"epoch": 9.583090091661044,
"grad_norm": 5.258711098576896e-06,
"learning_rate": 4.642322778923531e-09,
"loss": 0.0,
"step": 79000
},
{
"epoch": 9.595220585447956,
"grad_norm": 3.5142606975568924e-06,
"learning_rate": 4.5075282731475864e-09,
"loss": 0.0,
"step": 79100
},
{
"epoch": 9.607351079234869,
"grad_norm": 6.090160241001286e-06,
"learning_rate": 4.3740817124294016e-09,
"loss": 0.0,
"step": 79200
},
{
"epoch": 9.619481573021782,
"grad_norm": 4.854014150623698e-06,
"learning_rate": 4.239287206653457e-09,
"loss": 0.0,
"step": 79300
},
{
"epoch": 9.631612066808694,
"grad_norm": 1.0469612789165694e-05,
"learning_rate": 4.104492700877512e-09,
"loss": 0.0,
"step": 79400
},
{
"epoch": 9.643742560595607,
"grad_norm": 9.023720849654637e-06,
"learning_rate": 3.969698195101567e-09,
"loss": 0.0,
"step": 79500
},
{
"epoch": 9.65587305438252,
"grad_norm": 5.221518676989945e-06,
"learning_rate": 3.834903689325623e-09,
"loss": 0.0,
"step": 79600
},
{
"epoch": 9.668003548169432,
"grad_norm": 4.774324224854354e-06,
"learning_rate": 3.700109183549678e-09,
"loss": 0.0,
"step": 79700
},
{
"epoch": 9.680134041956345,
"grad_norm": 7.310536147997482e-06,
"learning_rate": 3.5653146777737337e-09,
"loss": 0.0,
"step": 79800
},
{
"epoch": 9.692264535743258,
"grad_norm": 8.2230781117687e-06,
"learning_rate": 3.430520171997789e-09,
"loss": 0.0,
"step": 79900
},
{
"epoch": 9.70439502953017,
"grad_norm": 7.887729225330986e-06,
"learning_rate": 3.295725666221845e-09,
"loss": 0.0,
"step": 80000
},
{
"epoch": 9.70439502953017,
"eval_loss": 5.108978484713589e-08,
"eval_runtime": 13226.5082,
"eval_samples_per_second": 31.911,
"eval_steps_per_second": 3.989,
"step": 80000
}
],
"logging_steps": 100,
"max_steps": 82430,
"num_input_tokens_seen": 0,
"num_train_epochs": 10,
"save_steps": 10000,
"stateful_callbacks": {
"EarlyStoppingCallback": {
"args": {
"early_stopping_patience": 3,
"early_stopping_threshold": 0.0
},
"attributes": {
"early_stopping_patience_counter": 0
}
},
"TrainerControl": {
"args": {
"should_epoch_stop": false,
"should_evaluate": false,
"should_log": false,
"should_save": true,
"should_training_stop": false
},
"attributes": {}
}
},
"total_flos": 2.0401765812729692e+19,
"train_batch_size": 16,
"trial_name": null,
"trial_params": null
}